github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/scheduler/reconcile_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "reflect" 6 "regexp" 7 "strconv" 8 "testing" 9 "time" 10 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/helper/testlog" 13 "github.com/hashicorp/nomad/helper/uuid" 14 "github.com/hashicorp/nomad/nomad/mock" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/kr/pretty" 17 "github.com/stretchr/testify/assert" 18 "github.com/stretchr/testify/require" 19 ) 20 21 var ( 22 canaryUpdate = &structs.UpdateStrategy{ 23 Canary: 2, 24 MaxParallel: 2, 25 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 26 MinHealthyTime: 10 * time.Second, 27 HealthyDeadline: 10 * time.Minute, 28 Stagger: 31 * time.Second, 29 } 30 31 noCanaryUpdate = &structs.UpdateStrategy{ 32 MaxParallel: 4, 33 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 34 MinHealthyTime: 10 * time.Second, 35 HealthyDeadline: 10 * time.Minute, 36 Stagger: 31 * time.Second, 37 } 38 ) 39 40 func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 41 return true, false, nil 42 } 43 44 func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 45 return false, true, nil 46 } 47 48 func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 49 // Create a shallow copy 50 newAlloc := existing.CopySkipJob() 51 newAlloc.TaskResources = make(map[string]*structs.Resources) 52 53 // Use the new task resources but keep the network from the old 54 for _, task := range newTG.Tasks { 55 r := task.Resources.Copy() 56 r.Networks = existing.TaskResources[task.Name].Networks 57 newAlloc.TaskResources[task.Name] = r 58 } 59 60 return false, false, newAlloc 61 } 62 63 func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType { 64 return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 65 if fn, ok := handled[existing.ID]; ok { 66 return fn(existing, newJob, newTG) 67 } 68 69 return unhandled(existing, newJob, newTG) 70 } 71 } 72 73 var ( 74 // AllocationIndexRegex is a regular expression to find the allocation index. 75 allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 76 ) 77 78 // allocNameToIndex returns the index of the allocation. 79 func allocNameToIndex(name string) uint { 80 matches := allocationIndexRegex.FindStringSubmatch(name) 81 if len(matches) != 2 { 82 return 0 83 } 84 85 index, err := strconv.Atoi(matches[1]) 86 if err != nil { 87 return 0 88 } 89 90 return uint(index) 91 } 92 93 func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) { 94 t.Helper() 95 m := make(map[uint]int) 96 for _, i := range indexes { 97 m[uint(i)] += 1 98 } 99 100 for _, n := range names { 101 index := allocNameToIndex(n) 102 val, contained := m[index] 103 if !contained { 104 t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names) 105 } 106 107 val-- 108 if val < 0 { 109 t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names) 110 } 111 m[index] = val 112 } 113 114 for k, remainder := range m { 115 if remainder != 0 { 116 t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names) 117 } 118 } 119 } 120 121 func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) { 122 t.Helper() 123 canaryIndex := make(map[string]struct{}) 124 for _, state := range d.TaskGroups { 125 for _, c := range state.PlacedCanaries { 126 canaryIndex[c] = struct{}{} 127 } 128 } 129 130 for _, s := range stop { 131 if _, ok := canaryIndex[s.alloc.ID]; ok { 132 t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name) 133 } 134 } 135 } 136 137 func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) { 138 t.Helper() 139 names := make(map[string]struct{}, numPrevious) 140 141 found := 0 142 for _, p := range place { 143 if _, ok := names[p.name]; ok { 144 t.Fatalf("Name %q already placed", p.name) 145 } 146 names[p.name] = struct{}{} 147 148 if p.previousAlloc == nil { 149 continue 150 } 151 152 if act := p.previousAlloc.Name; p.name != act { 153 t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name) 154 } 155 found++ 156 } 157 if numPrevious != found { 158 t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found) 159 } 160 } 161 162 func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) { 163 t.Helper() 164 names := make(map[string]struct{}, numRescheduled) 165 166 found := 0 167 for _, p := range place { 168 if _, ok := names[p.name]; ok { 169 t.Fatalf("Name %q already placed", p.name) 170 } 171 names[p.name] = struct{}{} 172 173 if p.previousAlloc == nil { 174 continue 175 } 176 if p.reschedule { 177 found++ 178 } 179 180 } 181 if numRescheduled != found { 182 t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found) 183 } 184 } 185 186 func intRange(pairs ...int) []int { 187 if len(pairs)%2 != 0 { 188 return nil 189 } 190 191 var r []int 192 for i := 0; i < len(pairs); i += 2 { 193 for j := pairs[i]; j <= pairs[i+1]; j++ { 194 r = append(r, j) 195 } 196 } 197 return r 198 } 199 200 func placeResultsToNames(place []allocPlaceResult) []string { 201 names := make([]string, 0, len(place)) 202 for _, p := range place { 203 names = append(names, p.name) 204 } 205 return names 206 } 207 208 func destructiveResultsToNames(destructive []allocDestructiveResult) []string { 209 names := make([]string, 0, len(destructive)) 210 for _, d := range destructive { 211 names = append(names, d.placeName) 212 } 213 return names 214 } 215 216 func stopResultsToNames(stop []allocStopResult) []string { 217 names := make([]string, 0, len(stop)) 218 for _, s := range stop { 219 names = append(names, s.alloc.Name) 220 } 221 return names 222 } 223 224 func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string { 225 names := make([]string, 0, len(attributeUpdates)) 226 for _, a := range attributeUpdates { 227 names = append(names, a.Name) 228 } 229 return names 230 } 231 232 func allocsToNames(allocs []*structs.Allocation) []string { 233 names := make([]string, 0, len(allocs)) 234 for _, a := range allocs { 235 names = append(names, a.Name) 236 } 237 return names 238 } 239 240 type resultExpectation struct { 241 createDeployment *structs.Deployment 242 deploymentUpdates []*structs.DeploymentStatusUpdate 243 place int 244 destructive int 245 inplace int 246 attributeUpdates int 247 stop int 248 desiredTGUpdates map[string]*structs.DesiredUpdates 249 } 250 251 func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { 252 t.Helper() 253 assert := assert.New(t) 254 255 if exp.createDeployment != nil && r.deployment == nil { 256 t.Errorf("Expect a created deployment got none") 257 } else if exp.createDeployment == nil && r.deployment != nil { 258 t.Errorf("Expect no created deployment; got %#v", r.deployment) 259 } else if exp.createDeployment != nil && r.deployment != nil { 260 // Clear the deployment ID 261 r.deployment.ID, exp.createDeployment.ID = "", "" 262 if !reflect.DeepEqual(r.deployment, exp.createDeployment) { 263 t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v", 264 r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment)) 265 } 266 } 267 268 assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates") 269 assert.Len(r.place, exp.place, "Expected Placements") 270 assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive") 271 assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates") 272 assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates") 273 assert.Len(r.stop, exp.stop, "Expected Stops") 274 assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations") 275 } 276 277 // Tests the reconciler properly handles placements for a job that has no 278 // existing allocations 279 func TestReconciler_Place_NoExisting(t *testing.T) { 280 job := mock.Job() 281 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "") 282 r := reconciler.Compute() 283 284 // Assert the correct results 285 assertResults(t, r, &resultExpectation{ 286 createDeployment: nil, 287 deploymentUpdates: nil, 288 place: 10, 289 inplace: 0, 290 stop: 0, 291 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 292 job.TaskGroups[0].Name: { 293 Place: 10, 294 }, 295 }, 296 }) 297 298 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 299 } 300 301 // Tests the reconciler properly handles placements for a job that has some 302 // existing allocations 303 func TestReconciler_Place_Existing(t *testing.T) { 304 job := mock.Job() 305 306 // Create 3 existing allocations 307 var allocs []*structs.Allocation 308 for i := 0; i < 5; i++ { 309 alloc := mock.Alloc() 310 alloc.Job = job 311 alloc.JobID = job.ID 312 alloc.NodeID = uuid.Generate() 313 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 314 allocs = append(allocs, alloc) 315 } 316 317 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 318 r := reconciler.Compute() 319 320 // Assert the correct results 321 assertResults(t, r, &resultExpectation{ 322 createDeployment: nil, 323 deploymentUpdates: nil, 324 place: 5, 325 inplace: 0, 326 stop: 0, 327 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 328 job.TaskGroups[0].Name: { 329 Place: 5, 330 Ignore: 5, 331 }, 332 }, 333 }) 334 335 assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place)) 336 } 337 338 // Tests the reconciler properly handles stopping allocations for a job that has 339 // scaled down 340 func TestReconciler_ScaleDown_Partial(t *testing.T) { 341 // Has desired 10 342 job := mock.Job() 343 344 // Create 20 existing allocations 345 var allocs []*structs.Allocation 346 for i := 0; i < 20; i++ { 347 alloc := mock.Alloc() 348 alloc.Job = job 349 alloc.JobID = job.ID 350 alloc.NodeID = uuid.Generate() 351 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 352 allocs = append(allocs, alloc) 353 } 354 355 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 356 r := reconciler.Compute() 357 358 // Assert the correct results 359 assertResults(t, r, &resultExpectation{ 360 createDeployment: nil, 361 deploymentUpdates: nil, 362 place: 0, 363 inplace: 0, 364 stop: 10, 365 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 366 job.TaskGroups[0].Name: { 367 Ignore: 10, 368 Stop: 10, 369 }, 370 }, 371 }) 372 373 assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop)) 374 } 375 376 // Tests the reconciler properly handles stopping allocations for a job that has 377 // scaled down to zero desired 378 func TestReconciler_ScaleDown_Zero(t *testing.T) { 379 // Set desired 0 380 job := mock.Job() 381 job.TaskGroups[0].Count = 0 382 383 // Create 20 existing allocations 384 var allocs []*structs.Allocation 385 for i := 0; i < 20; i++ { 386 alloc := mock.Alloc() 387 alloc.Job = job 388 alloc.JobID = job.ID 389 alloc.NodeID = uuid.Generate() 390 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 391 allocs = append(allocs, alloc) 392 } 393 394 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 395 r := reconciler.Compute() 396 397 // Assert the correct results 398 assertResults(t, r, &resultExpectation{ 399 createDeployment: nil, 400 deploymentUpdates: nil, 401 place: 0, 402 inplace: 0, 403 stop: 20, 404 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 405 job.TaskGroups[0].Name: { 406 Stop: 20, 407 }, 408 }, 409 }) 410 411 assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop)) 412 } 413 414 // Tests the reconciler properly handles stopping allocations for a job that has 415 // scaled down to zero desired where allocs have duplicate names 416 func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) { 417 // Set desired 0 418 job := mock.Job() 419 job.TaskGroups[0].Count = 0 420 421 // Create 20 existing allocations 422 var allocs []*structs.Allocation 423 var expectedStopped []int 424 for i := 0; i < 20; i++ { 425 alloc := mock.Alloc() 426 alloc.Job = job 427 alloc.JobID = job.ID 428 alloc.NodeID = uuid.Generate() 429 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 430 allocs = append(allocs, alloc) 431 expectedStopped = append(expectedStopped, i%2) 432 } 433 434 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 435 r := reconciler.Compute() 436 437 // Assert the correct results 438 assertResults(t, r, &resultExpectation{ 439 createDeployment: nil, 440 deploymentUpdates: nil, 441 place: 0, 442 inplace: 0, 443 stop: 20, 444 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 445 job.TaskGroups[0].Name: { 446 Stop: 20, 447 }, 448 }, 449 }) 450 451 assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop)) 452 } 453 454 // Tests the reconciler properly handles inplace upgrading allocations 455 func TestReconciler_Inplace(t *testing.T) { 456 job := mock.Job() 457 458 // Create 10 existing allocations 459 var allocs []*structs.Allocation 460 for i := 0; i < 10; i++ { 461 alloc := mock.Alloc() 462 alloc.Job = job 463 alloc.JobID = job.ID 464 alloc.NodeID = uuid.Generate() 465 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 466 allocs = append(allocs, alloc) 467 } 468 469 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 470 r := reconciler.Compute() 471 472 // Assert the correct results 473 assertResults(t, r, &resultExpectation{ 474 createDeployment: nil, 475 deploymentUpdates: nil, 476 place: 0, 477 inplace: 10, 478 stop: 0, 479 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 480 job.TaskGroups[0].Name: { 481 InPlaceUpdate: 10, 482 }, 483 }, 484 }) 485 486 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 487 } 488 489 // Tests the reconciler properly handles inplace upgrading allocations while 490 // scaling up 491 func TestReconciler_Inplace_ScaleUp(t *testing.T) { 492 // Set desired 15 493 job := mock.Job() 494 job.TaskGroups[0].Count = 15 495 496 // Create 10 existing allocations 497 var allocs []*structs.Allocation 498 for i := 0; i < 10; i++ { 499 alloc := mock.Alloc() 500 alloc.Job = job 501 alloc.JobID = job.ID 502 alloc.NodeID = uuid.Generate() 503 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 504 allocs = append(allocs, alloc) 505 } 506 507 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 508 r := reconciler.Compute() 509 510 // Assert the correct results 511 assertResults(t, r, &resultExpectation{ 512 createDeployment: nil, 513 deploymentUpdates: nil, 514 place: 5, 515 inplace: 10, 516 stop: 0, 517 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 518 job.TaskGroups[0].Name: { 519 Place: 5, 520 InPlaceUpdate: 10, 521 }, 522 }, 523 }) 524 525 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 526 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 527 } 528 529 // Tests the reconciler properly handles inplace upgrading allocations while 530 // scaling down 531 func TestReconciler_Inplace_ScaleDown(t *testing.T) { 532 // Set desired 5 533 job := mock.Job() 534 job.TaskGroups[0].Count = 5 535 536 // Create 10 existing allocations 537 var allocs []*structs.Allocation 538 for i := 0; i < 10; i++ { 539 alloc := mock.Alloc() 540 alloc.Job = job 541 alloc.JobID = job.ID 542 alloc.NodeID = uuid.Generate() 543 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 544 allocs = append(allocs, alloc) 545 } 546 547 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 548 r := reconciler.Compute() 549 550 // Assert the correct results 551 assertResults(t, r, &resultExpectation{ 552 createDeployment: nil, 553 deploymentUpdates: nil, 554 place: 0, 555 inplace: 5, 556 stop: 5, 557 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 558 job.TaskGroups[0].Name: { 559 Stop: 5, 560 InPlaceUpdate: 5, 561 }, 562 }, 563 }) 564 565 assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate)) 566 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 567 } 568 569 // Tests the reconciler properly handles destructive upgrading allocations 570 func TestReconciler_Destructive(t *testing.T) { 571 job := mock.Job() 572 573 // Create 10 existing allocations 574 var allocs []*structs.Allocation 575 for i := 0; i < 10; i++ { 576 alloc := mock.Alloc() 577 alloc.Job = job 578 alloc.JobID = job.ID 579 alloc.NodeID = uuid.Generate() 580 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 581 allocs = append(allocs, alloc) 582 } 583 584 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 585 r := reconciler.Compute() 586 587 // Assert the correct results 588 assertResults(t, r, &resultExpectation{ 589 createDeployment: nil, 590 deploymentUpdates: nil, 591 destructive: 10, 592 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 593 job.TaskGroups[0].Name: { 594 DestructiveUpdate: 10, 595 }, 596 }, 597 }) 598 599 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 600 } 601 602 // Tests the reconciler properly handles destructive upgrading allocations while 603 // scaling up 604 func TestReconciler_Destructive_ScaleUp(t *testing.T) { 605 // Set desired 15 606 job := mock.Job() 607 job.TaskGroups[0].Count = 15 608 609 // Create 10 existing allocations 610 var allocs []*structs.Allocation 611 for i := 0; i < 10; i++ { 612 alloc := mock.Alloc() 613 alloc.Job = job 614 alloc.JobID = job.ID 615 alloc.NodeID = uuid.Generate() 616 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 617 allocs = append(allocs, alloc) 618 } 619 620 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 621 r := reconciler.Compute() 622 623 // Assert the correct results 624 assertResults(t, r, &resultExpectation{ 625 createDeployment: nil, 626 deploymentUpdates: nil, 627 place: 5, 628 destructive: 10, 629 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 630 job.TaskGroups[0].Name: { 631 Place: 5, 632 DestructiveUpdate: 10, 633 }, 634 }, 635 }) 636 637 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 638 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 639 } 640 641 // Tests the reconciler properly handles destructive upgrading allocations while 642 // scaling down 643 func TestReconciler_Destructive_ScaleDown(t *testing.T) { 644 // Set desired 5 645 job := mock.Job() 646 job.TaskGroups[0].Count = 5 647 648 // Create 10 existing allocations 649 var allocs []*structs.Allocation 650 for i := 0; i < 10; i++ { 651 alloc := mock.Alloc() 652 alloc.Job = job 653 alloc.JobID = job.ID 654 alloc.NodeID = uuid.Generate() 655 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 656 allocs = append(allocs, alloc) 657 } 658 659 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 660 r := reconciler.Compute() 661 662 // Assert the correct results 663 assertResults(t, r, &resultExpectation{ 664 createDeployment: nil, 665 deploymentUpdates: nil, 666 destructive: 5, 667 stop: 5, 668 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 669 job.TaskGroups[0].Name: { 670 Stop: 5, 671 DestructiveUpdate: 5, 672 }, 673 }, 674 }) 675 676 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 677 assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate)) 678 } 679 680 // Tests the reconciler properly handles lost nodes with allocations 681 func TestReconciler_LostNode(t *testing.T) { 682 job := mock.Job() 683 684 // Create 10 existing allocations 685 var allocs []*structs.Allocation 686 for i := 0; i < 10; i++ { 687 alloc := mock.Alloc() 688 alloc.Job = job 689 alloc.JobID = job.ID 690 alloc.NodeID = uuid.Generate() 691 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 692 allocs = append(allocs, alloc) 693 } 694 695 // Build a map of tainted nodes 696 tainted := make(map[string]*structs.Node, 2) 697 for i := 0; i < 2; i++ { 698 n := mock.Node() 699 n.ID = allocs[i].NodeID 700 n.Status = structs.NodeStatusDown 701 tainted[n.ID] = n 702 } 703 704 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 705 r := reconciler.Compute() 706 707 // Assert the correct results 708 assertResults(t, r, &resultExpectation{ 709 createDeployment: nil, 710 deploymentUpdates: nil, 711 place: 2, 712 inplace: 0, 713 stop: 2, 714 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 715 job.TaskGroups[0].Name: { 716 Place: 2, 717 Stop: 2, 718 Ignore: 8, 719 }, 720 }, 721 }) 722 723 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 724 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 725 } 726 727 // Tests the reconciler properly handles lost nodes with allocations while 728 // scaling up 729 func TestReconciler_LostNode_ScaleUp(t *testing.T) { 730 // Set desired 15 731 job := mock.Job() 732 job.TaskGroups[0].Count = 15 733 734 // Create 10 existing allocations 735 var allocs []*structs.Allocation 736 for i := 0; i < 10; i++ { 737 alloc := mock.Alloc() 738 alloc.Job = job 739 alloc.JobID = job.ID 740 alloc.NodeID = uuid.Generate() 741 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 742 allocs = append(allocs, alloc) 743 } 744 745 // Build a map of tainted nodes 746 tainted := make(map[string]*structs.Node, 2) 747 for i := 0; i < 2; i++ { 748 n := mock.Node() 749 n.ID = allocs[i].NodeID 750 n.Status = structs.NodeStatusDown 751 tainted[n.ID] = n 752 } 753 754 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 755 r := reconciler.Compute() 756 757 // Assert the correct results 758 assertResults(t, r, &resultExpectation{ 759 createDeployment: nil, 760 deploymentUpdates: nil, 761 place: 7, 762 inplace: 0, 763 stop: 2, 764 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 765 job.TaskGroups[0].Name: { 766 Place: 7, 767 Stop: 2, 768 Ignore: 8, 769 }, 770 }, 771 }) 772 773 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 774 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 775 } 776 777 // Tests the reconciler properly handles lost nodes with allocations while 778 // scaling down 779 func TestReconciler_LostNode_ScaleDown(t *testing.T) { 780 // Set desired 5 781 job := mock.Job() 782 job.TaskGroups[0].Count = 5 783 784 // Create 10 existing allocations 785 var allocs []*structs.Allocation 786 for i := 0; i < 10; i++ { 787 alloc := mock.Alloc() 788 alloc.Job = job 789 alloc.JobID = job.ID 790 alloc.NodeID = uuid.Generate() 791 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 792 allocs = append(allocs, alloc) 793 } 794 795 // Build a map of tainted nodes 796 tainted := make(map[string]*structs.Node, 2) 797 for i := 0; i < 2; i++ { 798 n := mock.Node() 799 n.ID = allocs[i].NodeID 800 n.Status = structs.NodeStatusDown 801 tainted[n.ID] = n 802 } 803 804 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 805 r := reconciler.Compute() 806 807 // Assert the correct results 808 assertResults(t, r, &resultExpectation{ 809 createDeployment: nil, 810 deploymentUpdates: nil, 811 place: 0, 812 inplace: 0, 813 stop: 5, 814 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 815 job.TaskGroups[0].Name: { 816 Stop: 5, 817 Ignore: 5, 818 }, 819 }, 820 }) 821 822 assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop)) 823 } 824 825 // Tests the reconciler properly handles draining nodes with allocations 826 func TestReconciler_DrainNode(t *testing.T) { 827 job := mock.Job() 828 829 // Create 10 existing allocations 830 var allocs []*structs.Allocation 831 for i := 0; i < 10; i++ { 832 alloc := mock.Alloc() 833 alloc.Job = job 834 alloc.JobID = job.ID 835 alloc.NodeID = uuid.Generate() 836 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 837 allocs = append(allocs, alloc) 838 } 839 840 // Build a map of tainted nodes 841 tainted := make(map[string]*structs.Node, 2) 842 for i := 0; i < 2; i++ { 843 n := mock.Node() 844 n.ID = allocs[i].NodeID 845 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 846 n.Drain = true 847 tainted[n.ID] = n 848 } 849 850 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 851 r := reconciler.Compute() 852 853 // Assert the correct results 854 assertResults(t, r, &resultExpectation{ 855 createDeployment: nil, 856 deploymentUpdates: nil, 857 place: 2, 858 inplace: 0, 859 stop: 2, 860 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 861 job.TaskGroups[0].Name: { 862 Migrate: 2, 863 Ignore: 8, 864 }, 865 }, 866 }) 867 868 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 869 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 870 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 871 // These should not have the reschedule field set 872 assertPlacementsAreRescheduled(t, 0, r.place) 873 } 874 875 // Tests the reconciler properly handles draining nodes with allocations while 876 // scaling up 877 func TestReconciler_DrainNode_ScaleUp(t *testing.T) { 878 // Set desired 15 879 job := mock.Job() 880 job.TaskGroups[0].Count = 15 881 882 // Create 10 existing allocations 883 var allocs []*structs.Allocation 884 for i := 0; i < 10; i++ { 885 alloc := mock.Alloc() 886 alloc.Job = job 887 alloc.JobID = job.ID 888 alloc.NodeID = uuid.Generate() 889 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 890 allocs = append(allocs, alloc) 891 } 892 893 // Build a map of tainted nodes 894 tainted := make(map[string]*structs.Node, 2) 895 for i := 0; i < 2; i++ { 896 n := mock.Node() 897 n.ID = allocs[i].NodeID 898 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 899 n.Drain = true 900 tainted[n.ID] = n 901 } 902 903 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 904 r := reconciler.Compute() 905 906 // Assert the correct results 907 assertResults(t, r, &resultExpectation{ 908 createDeployment: nil, 909 deploymentUpdates: nil, 910 place: 7, 911 inplace: 0, 912 stop: 2, 913 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 914 job.TaskGroups[0].Name: { 915 Place: 5, 916 Migrate: 2, 917 Ignore: 8, 918 }, 919 }, 920 }) 921 922 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 923 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 924 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 925 // These should not have the reschedule field set 926 assertPlacementsAreRescheduled(t, 0, r.place) 927 } 928 929 // Tests the reconciler properly handles draining nodes with allocations while 930 // scaling down 931 func TestReconciler_DrainNode_ScaleDown(t *testing.T) { 932 // Set desired 8 933 job := mock.Job() 934 job.TaskGroups[0].Count = 8 935 936 // Create 10 existing allocations 937 var allocs []*structs.Allocation 938 for i := 0; i < 10; i++ { 939 alloc := mock.Alloc() 940 alloc.Job = job 941 alloc.JobID = job.ID 942 alloc.NodeID = uuid.Generate() 943 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 944 allocs = append(allocs, alloc) 945 } 946 947 // Build a map of tainted nodes 948 tainted := make(map[string]*structs.Node, 3) 949 for i := 0; i < 3; i++ { 950 n := mock.Node() 951 n.ID = allocs[i].NodeID 952 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 953 n.Drain = true 954 tainted[n.ID] = n 955 } 956 957 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 958 r := reconciler.Compute() 959 960 // Assert the correct results 961 assertResults(t, r, &resultExpectation{ 962 createDeployment: nil, 963 deploymentUpdates: nil, 964 place: 1, 965 inplace: 0, 966 stop: 3, 967 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 968 job.TaskGroups[0].Name: { 969 Migrate: 1, 970 Stop: 2, 971 Ignore: 7, 972 }, 973 }, 974 }) 975 976 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 977 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 978 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 979 // These should not have the reschedule field set 980 assertPlacementsAreRescheduled(t, 0, r.place) 981 } 982 983 // Tests the reconciler properly handles a task group being removed 984 func TestReconciler_RemovedTG(t *testing.T) { 985 job := mock.Job() 986 987 // Create 10 allocations for a tg that no longer exists 988 var allocs []*structs.Allocation 989 for i := 0; i < 10; i++ { 990 alloc := mock.Alloc() 991 alloc.Job = job 992 alloc.JobID = job.ID 993 alloc.NodeID = uuid.Generate() 994 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 995 allocs = append(allocs, alloc) 996 } 997 998 oldName := job.TaskGroups[0].Name 999 newName := "different" 1000 job.TaskGroups[0].Name = newName 1001 1002 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1003 r := reconciler.Compute() 1004 1005 // Assert the correct results 1006 assertResults(t, r, &resultExpectation{ 1007 createDeployment: nil, 1008 deploymentUpdates: nil, 1009 place: 10, 1010 inplace: 0, 1011 stop: 10, 1012 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1013 oldName: { 1014 Stop: 10, 1015 }, 1016 newName: { 1017 Place: 10, 1018 }, 1019 }, 1020 }) 1021 1022 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1023 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 1024 } 1025 1026 // Tests the reconciler properly handles a job in stopped states 1027 func TestReconciler_JobStopped(t *testing.T) { 1028 job := mock.Job() 1029 job.Stop = true 1030 1031 cases := []struct { 1032 name string 1033 job *structs.Job 1034 jobID, taskGroup string 1035 }{ 1036 { 1037 name: "stopped job", 1038 job: job, 1039 jobID: job.ID, 1040 taskGroup: job.TaskGroups[0].Name, 1041 }, 1042 { 1043 name: "nil job", 1044 job: nil, 1045 jobID: "foo", 1046 taskGroup: "bar", 1047 }, 1048 } 1049 1050 for _, c := range cases { 1051 t.Run(c.name, func(t *testing.T) { 1052 // Create 10 allocations 1053 var allocs []*structs.Allocation 1054 for i := 0; i < 10; i++ { 1055 alloc := mock.Alloc() 1056 alloc.Job = c.job 1057 alloc.JobID = c.jobID 1058 alloc.NodeID = uuid.Generate() 1059 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1060 alloc.TaskGroup = c.taskGroup 1061 allocs = append(allocs, alloc) 1062 } 1063 1064 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "") 1065 r := reconciler.Compute() 1066 1067 // Assert the correct results 1068 assertResults(t, r, &resultExpectation{ 1069 createDeployment: nil, 1070 deploymentUpdates: nil, 1071 place: 0, 1072 inplace: 0, 1073 stop: 10, 1074 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1075 c.taskGroup: { 1076 Stop: 10, 1077 }, 1078 }, 1079 }) 1080 1081 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1082 }) 1083 } 1084 } 1085 1086 // Tests the reconciler properly handles jobs with multiple task groups 1087 func TestReconciler_MultiTG(t *testing.T) { 1088 job := mock.Job() 1089 tg2 := job.TaskGroups[0].Copy() 1090 tg2.Name = "foo" 1091 job.TaskGroups = append(job.TaskGroups, tg2) 1092 1093 // Create 2 existing allocations for the first tg 1094 var allocs []*structs.Allocation 1095 for i := 0; i < 2; i++ { 1096 alloc := mock.Alloc() 1097 alloc.Job = job 1098 alloc.JobID = job.ID 1099 alloc.NodeID = uuid.Generate() 1100 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1101 allocs = append(allocs, alloc) 1102 } 1103 1104 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1105 r := reconciler.Compute() 1106 1107 // Assert the correct results 1108 assertResults(t, r, &resultExpectation{ 1109 createDeployment: nil, 1110 deploymentUpdates: nil, 1111 place: 18, 1112 inplace: 0, 1113 stop: 0, 1114 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1115 job.TaskGroups[0].Name: { 1116 Place: 8, 1117 Ignore: 2, 1118 }, 1119 tg2.Name: { 1120 Place: 10, 1121 }, 1122 }, 1123 }) 1124 1125 assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) 1126 } 1127 1128 // Tests the reconciler properly handles jobs with multiple task groups with 1129 // only one having an update stanza and a deployment already being created 1130 func TestReconciler_MultiTG_SingleUpdateStanza(t *testing.T) { 1131 job := mock.Job() 1132 tg2 := job.TaskGroups[0].Copy() 1133 tg2.Name = "foo" 1134 job.TaskGroups = append(job.TaskGroups, tg2) 1135 job.TaskGroups[0].Update = noCanaryUpdate 1136 1137 // Create all the allocs 1138 var allocs []*structs.Allocation 1139 for i := 0; i < 2; i++ { 1140 for j := 0; j < 10; j++ { 1141 alloc := mock.Alloc() 1142 alloc.Job = job 1143 alloc.JobID = job.ID 1144 alloc.NodeID = uuid.Generate() 1145 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j)) 1146 alloc.TaskGroup = job.TaskGroups[i].Name 1147 allocs = append(allocs, alloc) 1148 } 1149 } 1150 1151 d := structs.NewDeployment(job) 1152 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1153 DesiredTotal: 10, 1154 } 1155 1156 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 1157 r := reconciler.Compute() 1158 1159 // Assert the correct results 1160 assertResults(t, r, &resultExpectation{ 1161 createDeployment: nil, 1162 deploymentUpdates: nil, 1163 place: 0, 1164 inplace: 0, 1165 stop: 0, 1166 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1167 job.TaskGroups[0].Name: { 1168 Ignore: 10, 1169 }, 1170 tg2.Name: { 1171 Ignore: 10, 1172 }, 1173 }, 1174 }) 1175 } 1176 1177 // Tests delayed rescheduling of failed batch allocations 1178 func TestReconciler_RescheduleLater_Batch(t *testing.T) { 1179 require := require.New(t) 1180 1181 // Set desired 4 1182 job := mock.Job() 1183 job.TaskGroups[0].Count = 4 1184 now := time.Now() 1185 1186 // Set up reschedule policy 1187 delayDur := 15 * time.Second 1188 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1189 tgName := job.TaskGroups[0].Name 1190 1191 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1192 var allocs []*structs.Allocation 1193 for i := 0; i < 6; i++ { 1194 alloc := mock.Alloc() 1195 alloc.Job = job 1196 alloc.JobID = job.ID 1197 alloc.NodeID = uuid.Generate() 1198 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1199 allocs = append(allocs, alloc) 1200 alloc.ClientStatus = structs.AllocClientStatusRunning 1201 } 1202 1203 // Mark 3 as failed with restart tracking info 1204 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1205 allocs[0].NextAllocation = allocs[1].ID 1206 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1207 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1208 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1209 PrevAllocID: allocs[0].ID, 1210 PrevNodeID: uuid.Generate(), 1211 }, 1212 }} 1213 allocs[1].NextAllocation = allocs[2].ID 1214 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1215 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1216 StartedAt: now.Add(-1 * time.Hour), 1217 FinishedAt: now}} 1218 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1219 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1220 PrevAllocID: allocs[0].ID, 1221 PrevNodeID: uuid.Generate(), 1222 }, 1223 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1224 PrevAllocID: allocs[1].ID, 1225 PrevNodeID: uuid.Generate(), 1226 }, 1227 }} 1228 1229 // Mark one as complete 1230 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1231 1232 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1233 r := reconciler.Compute() 1234 1235 // Two reschedule attempts were already made, one more can be made at a future time 1236 // Verify that the follow up eval has the expected waitUntil time 1237 evals := r.desiredFollowupEvals[tgName] 1238 require.NotNil(evals) 1239 require.Equal(1, len(evals)) 1240 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1241 1242 // Alloc 5 should not be replaced because it is terminal 1243 assertResults(t, r, &resultExpectation{ 1244 createDeployment: nil, 1245 deploymentUpdates: nil, 1246 place: 0, 1247 inplace: 0, 1248 attributeUpdates: 1, 1249 stop: 0, 1250 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1251 job.TaskGroups[0].Name: { 1252 Place: 0, 1253 InPlaceUpdate: 0, 1254 Ignore: 4, 1255 }, 1256 }, 1257 }) 1258 assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates)) 1259 1260 // Verify that the followup evalID field is set correctly 1261 var annotated *structs.Allocation 1262 for _, a := range r.attributeUpdates { 1263 annotated = a 1264 } 1265 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1266 } 1267 1268 // Tests delayed rescheduling of failed batch allocations and batching of allocs 1269 // with fail times that are close together 1270 func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { 1271 require := require.New(t) 1272 1273 // Set desired 4 1274 job := mock.Job() 1275 job.TaskGroups[0].Count = 10 1276 now := time.Now() 1277 1278 // Set up reschedule policy 1279 delayDur := 15 * time.Second 1280 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1281 tgName := job.TaskGroups[0].Name 1282 1283 // Create 10 existing allocations 1284 var allocs []*structs.Allocation 1285 for i := 0; i < 10; i++ { 1286 alloc := mock.Alloc() 1287 alloc.Job = job 1288 alloc.JobID = job.ID 1289 alloc.NodeID = uuid.Generate() 1290 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1291 allocs = append(allocs, alloc) 1292 alloc.ClientStatus = structs.AllocClientStatusRunning 1293 } 1294 1295 // Mark 5 as failed with fail times very close together 1296 for i := 0; i < 5; i++ { 1297 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1298 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1299 StartedAt: now.Add(-1 * time.Hour), 1300 FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}} 1301 } 1302 1303 // Mark two more as failed several seconds later 1304 for i := 5; i < 7; i++ { 1305 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1306 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1307 StartedAt: now.Add(-1 * time.Hour), 1308 FinishedAt: now.Add(10 * time.Second)}} 1309 } 1310 1311 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1312 r := reconciler.Compute() 1313 1314 // Verify that two follow up evals were created 1315 evals := r.desiredFollowupEvals[tgName] 1316 require.NotNil(evals) 1317 require.Equal(2, len(evals)) 1318 1319 // Verify expected WaitUntil values for both batched evals 1320 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1321 secondBatchDuration := delayDur + 10*time.Second 1322 require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil) 1323 1324 // Alloc 5 should not be replaced because it is terminal 1325 assertResults(t, r, &resultExpectation{ 1326 createDeployment: nil, 1327 deploymentUpdates: nil, 1328 place: 0, 1329 inplace: 0, 1330 attributeUpdates: 7, 1331 stop: 0, 1332 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1333 job.TaskGroups[0].Name: { 1334 Place: 0, 1335 InPlaceUpdate: 0, 1336 Ignore: 10, 1337 }, 1338 }, 1339 }) 1340 assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates)) 1341 1342 // Verify that the followup evalID field is set correctly 1343 for _, alloc := range r.attributeUpdates { 1344 if allocNameToIndex(alloc.Name) < 5 { 1345 require.Equal(evals[0].ID, alloc.FollowupEvalID) 1346 } else if allocNameToIndex(alloc.Name) < 7 { 1347 require.Equal(evals[1].ID, alloc.FollowupEvalID) 1348 } else { 1349 t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name) 1350 } 1351 } 1352 } 1353 1354 // Tests rescheduling failed batch allocations 1355 func TestReconciler_RescheduleNow_Batch(t *testing.T) { 1356 require := require.New(t) 1357 // Set desired 4 1358 job := mock.Job() 1359 job.TaskGroups[0].Count = 4 1360 now := time.Now() 1361 // Set up reschedule policy 1362 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"} 1363 tgName := job.TaskGroups[0].Name 1364 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1365 var allocs []*structs.Allocation 1366 for i := 0; i < 6; i++ { 1367 alloc := mock.Alloc() 1368 alloc.Job = job 1369 alloc.JobID = job.ID 1370 alloc.NodeID = uuid.Generate() 1371 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1372 allocs = append(allocs, alloc) 1373 alloc.ClientStatus = structs.AllocClientStatusRunning 1374 } 1375 // Mark 3 as failed with restart tracking info 1376 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1377 allocs[0].NextAllocation = allocs[1].ID 1378 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1379 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1380 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1381 PrevAllocID: allocs[0].ID, 1382 PrevNodeID: uuid.Generate(), 1383 }, 1384 }} 1385 allocs[1].NextAllocation = allocs[2].ID 1386 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1387 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1388 StartedAt: now.Add(-1 * time.Hour), 1389 FinishedAt: now.Add(-5 * time.Second)}} 1390 allocs[2].FollowupEvalID = uuid.Generate() 1391 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1392 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1393 PrevAllocID: allocs[0].ID, 1394 PrevNodeID: uuid.Generate(), 1395 }, 1396 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1397 PrevAllocID: allocs[1].ID, 1398 PrevNodeID: uuid.Generate(), 1399 }, 1400 }} 1401 // Mark one as complete 1402 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1403 1404 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "") 1405 reconciler.now = now 1406 r := reconciler.Compute() 1407 1408 // Verify that no follow up evals were created 1409 evals := r.desiredFollowupEvals[tgName] 1410 require.Nil(evals) 1411 1412 // Two reschedule attempts were made, one more can be made now 1413 // Alloc 5 should not be replaced because it is terminal 1414 assertResults(t, r, &resultExpectation{ 1415 createDeployment: nil, 1416 deploymentUpdates: nil, 1417 place: 1, 1418 inplace: 0, 1419 stop: 0, 1420 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1421 job.TaskGroups[0].Name: { 1422 Place: 1, 1423 Ignore: 3, 1424 }, 1425 }, 1426 }) 1427 1428 assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place)) 1429 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1430 assertPlacementsAreRescheduled(t, 1, r.place) 1431 1432 } 1433 1434 // Tests rescheduling failed service allocations with desired state stop 1435 func TestReconciler_RescheduleLater_Service(t *testing.T) { 1436 require := require.New(t) 1437 1438 // Set desired 5 1439 job := mock.Job() 1440 job.TaskGroups[0].Count = 5 1441 tgName := job.TaskGroups[0].Name 1442 now := time.Now() 1443 1444 // Set up reschedule policy 1445 delayDur := 15 * time.Second 1446 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour} 1447 1448 // Create 5 existing allocations 1449 var allocs []*structs.Allocation 1450 for i := 0; i < 5; i++ { 1451 alloc := mock.Alloc() 1452 alloc.Job = job 1453 alloc.JobID = job.ID 1454 alloc.NodeID = uuid.Generate() 1455 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1456 allocs = append(allocs, alloc) 1457 alloc.ClientStatus = structs.AllocClientStatusRunning 1458 } 1459 1460 // Mark two as failed 1461 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1462 1463 // Mark one of them as already rescheduled once 1464 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1465 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1466 PrevAllocID: uuid.Generate(), 1467 PrevNodeID: uuid.Generate(), 1468 }, 1469 }} 1470 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1471 StartedAt: now.Add(-1 * time.Hour), 1472 FinishedAt: now}} 1473 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1474 1475 // Mark one as desired state stop 1476 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1477 1478 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate()) 1479 r := reconciler.Compute() 1480 1481 // Should place a new placement and create a follow up eval for the delayed reschedule 1482 // Verify that the follow up eval has the expected waitUntil time 1483 evals := r.desiredFollowupEvals[tgName] 1484 require.NotNil(evals) 1485 require.Equal(1, len(evals)) 1486 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1487 1488 assertResults(t, r, &resultExpectation{ 1489 createDeployment: nil, 1490 deploymentUpdates: nil, 1491 place: 1, 1492 inplace: 0, 1493 attributeUpdates: 1, 1494 stop: 0, 1495 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1496 job.TaskGroups[0].Name: { 1497 Place: 1, 1498 InPlaceUpdate: 0, 1499 Ignore: 4, 1500 }, 1501 }, 1502 }) 1503 1504 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1505 assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates)) 1506 1507 // Verify that the followup evalID field is set correctly 1508 var annotated *structs.Allocation 1509 for _, a := range r.attributeUpdates { 1510 annotated = a 1511 } 1512 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1513 } 1514 1515 // Tests service allocations with client status complete 1516 func TestReconciler_Service_ClientStatusComplete(t *testing.T) { 1517 // Set desired 5 1518 job := mock.Job() 1519 job.TaskGroups[0].Count = 5 1520 1521 // Set up reschedule policy 1522 delayDur := 15 * time.Second 1523 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1524 Attempts: 1, 1525 Interval: 24 * time.Hour, 1526 Delay: delayDur, 1527 MaxDelay: 1 * time.Hour, 1528 } 1529 1530 // Create 5 existing allocations 1531 var allocs []*structs.Allocation 1532 for i := 0; i < 5; i++ { 1533 alloc := mock.Alloc() 1534 alloc.Job = job 1535 alloc.JobID = job.ID 1536 alloc.NodeID = uuid.Generate() 1537 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1538 allocs = append(allocs, alloc) 1539 alloc.ClientStatus = structs.AllocClientStatusRunning 1540 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1541 } 1542 1543 // Mark one as client status complete 1544 allocs[4].ClientStatus = structs.AllocClientStatusComplete 1545 1546 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1547 r := reconciler.Compute() 1548 1549 // Should place a new placement for the alloc that was marked complete 1550 assertResults(t, r, &resultExpectation{ 1551 createDeployment: nil, 1552 deploymentUpdates: nil, 1553 place: 1, 1554 inplace: 0, 1555 stop: 0, 1556 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1557 job.TaskGroups[0].Name: { 1558 Place: 1, 1559 InPlaceUpdate: 0, 1560 Ignore: 4, 1561 }, 1562 }, 1563 }) 1564 1565 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1566 1567 } 1568 1569 // Tests service job placement with desired stop and client status complete 1570 func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) { 1571 // Set desired 5 1572 job := mock.Job() 1573 job.TaskGroups[0].Count = 5 1574 1575 // Set up reschedule policy 1576 delayDur := 15 * time.Second 1577 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1578 Attempts: 1, 1579 Interval: 24 * time.Hour, 1580 Delay: delayDur, 1581 MaxDelay: 1 * time.Hour, 1582 } 1583 1584 // Create 5 existing allocations 1585 var allocs []*structs.Allocation 1586 for i := 0; i < 5; i++ { 1587 alloc := mock.Alloc() 1588 alloc.Job = job 1589 alloc.JobID = job.ID 1590 alloc.NodeID = uuid.Generate() 1591 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1592 allocs = append(allocs, alloc) 1593 alloc.ClientStatus = structs.AllocClientStatusRunning 1594 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1595 } 1596 1597 // Mark one as failed but with desired status stop 1598 // Should not trigger rescheduling logic but should trigger a placement 1599 allocs[4].ClientStatus = structs.AllocClientStatusFailed 1600 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1601 1602 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1603 r := reconciler.Compute() 1604 1605 // Should place a new placement for the alloc that was marked stopped 1606 assertResults(t, r, &resultExpectation{ 1607 createDeployment: nil, 1608 deploymentUpdates: nil, 1609 place: 1, 1610 inplace: 0, 1611 stop: 0, 1612 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1613 job.TaskGroups[0].Name: { 1614 Place: 1, 1615 InPlaceUpdate: 0, 1616 Ignore: 4, 1617 }, 1618 }, 1619 }) 1620 1621 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1622 1623 // Should not have any follow up evals created 1624 require := require.New(t) 1625 require.Equal(0, len(r.desiredFollowupEvals)) 1626 } 1627 1628 // Tests rescheduling failed service allocations with desired state stop 1629 func TestReconciler_RescheduleNow_Service(t *testing.T) { 1630 require := require.New(t) 1631 1632 // Set desired 5 1633 job := mock.Job() 1634 job.TaskGroups[0].Count = 5 1635 tgName := job.TaskGroups[0].Name 1636 now := time.Now() 1637 1638 // Set up reschedule policy and update stanza 1639 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1640 Attempts: 1, 1641 Interval: 24 * time.Hour, 1642 Delay: 5 * time.Second, 1643 DelayFunction: "", 1644 MaxDelay: 1 * time.Hour, 1645 Unlimited: false, 1646 } 1647 job.TaskGroups[0].Update = noCanaryUpdate 1648 1649 // Create 5 existing allocations 1650 var allocs []*structs.Allocation 1651 for i := 0; i < 5; i++ { 1652 alloc := mock.Alloc() 1653 alloc.Job = job 1654 alloc.JobID = job.ID 1655 alloc.NodeID = uuid.Generate() 1656 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1657 allocs = append(allocs, alloc) 1658 alloc.ClientStatus = structs.AllocClientStatusRunning 1659 } 1660 1661 // Mark two as failed 1662 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1663 1664 // Mark one of them as already rescheduled once 1665 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1666 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1667 PrevAllocID: uuid.Generate(), 1668 PrevNodeID: uuid.Generate(), 1669 }, 1670 }} 1671 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1672 StartedAt: now.Add(-1 * time.Hour), 1673 FinishedAt: now.Add(-10 * time.Second)}} 1674 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1675 1676 // Mark one as desired state stop 1677 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1678 1679 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1680 r := reconciler.Compute() 1681 1682 // Verify that no follow up evals were created 1683 evals := r.desiredFollowupEvals[tgName] 1684 require.Nil(evals) 1685 1686 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 1687 assertResults(t, r, &resultExpectation{ 1688 createDeployment: nil, 1689 deploymentUpdates: nil, 1690 place: 2, 1691 inplace: 0, 1692 stop: 0, 1693 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1694 job.TaskGroups[0].Name: { 1695 Place: 2, 1696 Ignore: 3, 1697 }, 1698 }, 1699 }) 1700 1701 // Rescheduled allocs should have previous allocs 1702 assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) 1703 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1704 assertPlacementsAreRescheduled(t, 1, r.place) 1705 } 1706 1707 // Tests rescheduling failed service allocations when there's clock drift (upto a second) 1708 func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) { 1709 require := require.New(t) 1710 1711 // Set desired 5 1712 job := mock.Job() 1713 job.TaskGroups[0].Count = 5 1714 tgName := job.TaskGroups[0].Name 1715 now := time.Now() 1716 1717 // Set up reschedule policy and update stanza 1718 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1719 Attempts: 1, 1720 Interval: 24 * time.Hour, 1721 Delay: 5 * time.Second, 1722 DelayFunction: "", 1723 MaxDelay: 1 * time.Hour, 1724 Unlimited: false, 1725 } 1726 job.TaskGroups[0].Update = noCanaryUpdate 1727 1728 // Create 5 existing allocations 1729 var allocs []*structs.Allocation 1730 for i := 0; i < 5; i++ { 1731 alloc := mock.Alloc() 1732 alloc.Job = job 1733 alloc.JobID = job.ID 1734 alloc.NodeID = uuid.Generate() 1735 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1736 allocs = append(allocs, alloc) 1737 alloc.ClientStatus = structs.AllocClientStatusRunning 1738 } 1739 1740 // Mark one as failed 1741 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1742 1743 // Mark one of them as already rescheduled once 1744 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1745 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1746 PrevAllocID: uuid.Generate(), 1747 PrevNodeID: uuid.Generate(), 1748 }, 1749 }} 1750 // Set fail time to 4 seconds ago which falls within the reschedule window 1751 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1752 StartedAt: now.Add(-1 * time.Hour), 1753 FinishedAt: now.Add(-4 * time.Second)}} 1754 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1755 1756 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1757 reconciler.now = now 1758 r := reconciler.Compute() 1759 1760 // Verify that no follow up evals were created 1761 evals := r.desiredFollowupEvals[tgName] 1762 require.Nil(evals) 1763 1764 // Verify that one rescheduled alloc was placed 1765 assertResults(t, r, &resultExpectation{ 1766 createDeployment: nil, 1767 deploymentUpdates: nil, 1768 place: 1, 1769 inplace: 0, 1770 stop: 0, 1771 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1772 job.TaskGroups[0].Name: { 1773 Place: 1, 1774 Ignore: 4, 1775 }, 1776 }, 1777 }) 1778 1779 // Rescheduled allocs should have previous allocs 1780 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1781 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1782 assertPlacementsAreRescheduled(t, 1, r.place) 1783 } 1784 1785 // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift 1786 func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) { 1787 require := require.New(t) 1788 1789 // Set desired 5 1790 job := mock.Job() 1791 job.TaskGroups[0].Count = 5 1792 tgName := job.TaskGroups[0].Name 1793 now := time.Now() 1794 1795 // Set up reschedule policy and update stanza 1796 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1797 Attempts: 1, 1798 Interval: 24 * time.Hour, 1799 Delay: 5 * time.Second, 1800 DelayFunction: "", 1801 MaxDelay: 1 * time.Hour, 1802 Unlimited: false, 1803 } 1804 job.TaskGroups[0].Update = noCanaryUpdate 1805 1806 // Create 5 existing allocations 1807 var allocs []*structs.Allocation 1808 for i := 0; i < 5; i++ { 1809 alloc := mock.Alloc() 1810 alloc.Job = job 1811 alloc.JobID = job.ID 1812 alloc.NodeID = uuid.Generate() 1813 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1814 allocs = append(allocs, alloc) 1815 alloc.ClientStatus = structs.AllocClientStatusRunning 1816 } 1817 1818 // Mark one as failed 1819 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1820 1821 // Mark one of them as already rescheduled once 1822 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1823 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1824 PrevAllocID: uuid.Generate(), 1825 PrevNodeID: uuid.Generate(), 1826 }, 1827 }} 1828 // Set fail time to 5 seconds ago and eval ID 1829 evalID := uuid.Generate() 1830 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1831 StartedAt: now.Add(-1 * time.Hour), 1832 FinishedAt: now.Add(-5 * time.Second)}} 1833 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1834 allocs[1].FollowupEvalID = evalID 1835 1836 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID) 1837 reconciler.now = now.Add(-30 * time.Second) 1838 r := reconciler.Compute() 1839 1840 // Verify that no follow up evals were created 1841 evals := r.desiredFollowupEvals[tgName] 1842 require.Nil(evals) 1843 1844 // Verify that one rescheduled alloc was placed 1845 assertResults(t, r, &resultExpectation{ 1846 createDeployment: nil, 1847 deploymentUpdates: nil, 1848 place: 1, 1849 inplace: 0, 1850 stop: 0, 1851 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1852 job.TaskGroups[0].Name: { 1853 Place: 1, 1854 Ignore: 4, 1855 }, 1856 }, 1857 }) 1858 1859 // Rescheduled allocs should have previous allocs 1860 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1861 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1862 assertPlacementsAreRescheduled(t, 1, r.place) 1863 } 1864 1865 // Tests rescheduling failed service allocations when there are canaries 1866 func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) { 1867 require := require.New(t) 1868 1869 // Set desired 5 1870 job := mock.Job() 1871 job.TaskGroups[0].Count = 5 1872 tgName := job.TaskGroups[0].Name 1873 now := time.Now() 1874 1875 // Set up reschedule policy and update stanza 1876 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1877 Attempts: 1, 1878 Interval: 24 * time.Hour, 1879 Delay: 5 * time.Second, 1880 DelayFunction: "", 1881 MaxDelay: 1 * time.Hour, 1882 Unlimited: false, 1883 } 1884 job.TaskGroups[0].Update = canaryUpdate 1885 1886 job2 := job.Copy() 1887 job2.Version++ 1888 1889 d := structs.NewDeployment(job2) 1890 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 1891 s := &structs.DeploymentState{ 1892 DesiredCanaries: 2, 1893 DesiredTotal: 5, 1894 } 1895 d.TaskGroups[job.TaskGroups[0].Name] = s 1896 1897 // Create 5 existing allocations 1898 var allocs []*structs.Allocation 1899 for i := 0; i < 5; i++ { 1900 alloc := mock.Alloc() 1901 alloc.Job = job 1902 alloc.JobID = job.ID 1903 alloc.NodeID = uuid.Generate() 1904 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1905 allocs = append(allocs, alloc) 1906 alloc.ClientStatus = structs.AllocClientStatusRunning 1907 } 1908 1909 // Mark three as failed 1910 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1911 1912 // Mark one of them as already rescheduled once 1913 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1914 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1915 PrevAllocID: uuid.Generate(), 1916 PrevNodeID: uuid.Generate(), 1917 }, 1918 }} 1919 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1920 StartedAt: now.Add(-1 * time.Hour), 1921 FinishedAt: now.Add(-10 * time.Second)}} 1922 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1923 1924 // Mark one as desired state stop 1925 allocs[4].ClientStatus = structs.AllocClientStatusFailed 1926 1927 // Create 2 canary allocations 1928 for i := 0; i < 2; i++ { 1929 alloc := mock.Alloc() 1930 alloc.Job = job 1931 alloc.JobID = job.ID 1932 alloc.NodeID = uuid.Generate() 1933 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1934 alloc.ClientStatus = structs.AllocClientStatusRunning 1935 alloc.DeploymentID = d.ID 1936 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 1937 Canary: true, 1938 Healthy: helper.BoolToPtr(false), 1939 } 1940 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 1941 allocs = append(allocs, alloc) 1942 } 1943 1944 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 1945 r := reconciler.Compute() 1946 1947 // Verify that no follow up evals were created 1948 evals := r.desiredFollowupEvals[tgName] 1949 require.Nil(evals) 1950 1951 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 1952 assertResults(t, r, &resultExpectation{ 1953 createDeployment: nil, 1954 deploymentUpdates: nil, 1955 place: 2, 1956 inplace: 0, 1957 stop: 0, 1958 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1959 job.TaskGroups[0].Name: { 1960 Place: 2, 1961 Ignore: 5, 1962 }, 1963 }, 1964 }) 1965 1966 // Rescheduled allocs should have previous allocs 1967 assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) 1968 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 1969 assertPlacementsAreRescheduled(t, 2, r.place) 1970 } 1971 1972 // Tests rescheduling failed canary service allocations 1973 func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { 1974 require := require.New(t) 1975 1976 // Set desired 5 1977 job := mock.Job() 1978 job.TaskGroups[0].Count = 5 1979 tgName := job.TaskGroups[0].Name 1980 now := time.Now() 1981 1982 // Set up reschedule policy and update stanza 1983 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1984 Delay: 5 * time.Second, 1985 DelayFunction: "constant", 1986 MaxDelay: 1 * time.Hour, 1987 Unlimited: true, 1988 } 1989 job.TaskGroups[0].Update = canaryUpdate 1990 1991 job2 := job.Copy() 1992 job2.Version++ 1993 1994 d := structs.NewDeployment(job2) 1995 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 1996 s := &structs.DeploymentState{ 1997 DesiredCanaries: 2, 1998 DesiredTotal: 5, 1999 } 2000 d.TaskGroups[job.TaskGroups[0].Name] = s 2001 2002 // Create 5 existing allocations 2003 var allocs []*structs.Allocation 2004 for i := 0; i < 5; i++ { 2005 alloc := mock.Alloc() 2006 alloc.Job = job 2007 alloc.JobID = job.ID 2008 alloc.NodeID = uuid.Generate() 2009 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2010 allocs = append(allocs, alloc) 2011 alloc.ClientStatus = structs.AllocClientStatusRunning 2012 } 2013 2014 // Create 2 healthy canary allocations 2015 for i := 0; i < 2; i++ { 2016 alloc := mock.Alloc() 2017 alloc.Job = job 2018 alloc.JobID = job.ID 2019 alloc.NodeID = uuid.Generate() 2020 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2021 alloc.ClientStatus = structs.AllocClientStatusRunning 2022 alloc.DeploymentID = d.ID 2023 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2024 Canary: true, 2025 Healthy: helper.BoolToPtr(false), 2026 } 2027 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2028 allocs = append(allocs, alloc) 2029 } 2030 2031 // Mark the canaries as failed 2032 allocs[5].ClientStatus = structs.AllocClientStatusFailed 2033 allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2034 2035 // Mark one of them as already rescheduled once 2036 allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2037 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 2038 PrevAllocID: uuid.Generate(), 2039 PrevNodeID: uuid.Generate(), 2040 }, 2041 }} 2042 2043 allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 2044 StartedAt: now.Add(-1 * time.Hour), 2045 FinishedAt: now.Add(-10 * time.Second)}} 2046 allocs[6].ClientStatus = structs.AllocClientStatusFailed 2047 allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2048 2049 // Create 4 unhealthy canary allocations that have already been replaced 2050 for i := 0; i < 4; i++ { 2051 alloc := mock.Alloc() 2052 alloc.Job = job 2053 alloc.JobID = job.ID 2054 alloc.NodeID = uuid.Generate() 2055 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 2056 alloc.ClientStatus = structs.AllocClientStatusFailed 2057 alloc.DeploymentID = d.ID 2058 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2059 Canary: true, 2060 Healthy: helper.BoolToPtr(false), 2061 } 2062 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2063 allocs = append(allocs, alloc) 2064 } 2065 2066 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2067 reconciler.now = now 2068 r := reconciler.Compute() 2069 2070 // Verify that no follow up evals were created 2071 evals := r.desiredFollowupEvals[tgName] 2072 require.Nil(evals) 2073 2074 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2075 assertResults(t, r, &resultExpectation{ 2076 createDeployment: nil, 2077 deploymentUpdates: nil, 2078 place: 2, 2079 inplace: 0, 2080 stop: 0, 2081 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2082 job.TaskGroups[0].Name: { 2083 Place: 2, 2084 Ignore: 9, 2085 }, 2086 }, 2087 }) 2088 2089 // Rescheduled allocs should have previous allocs 2090 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2091 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 2092 assertPlacementsAreRescheduled(t, 2, r.place) 2093 } 2094 2095 // Tests rescheduling failed canary service allocations when one has reached its 2096 // reschedule limit 2097 func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) { 2098 require := require.New(t) 2099 2100 // Set desired 5 2101 job := mock.Job() 2102 job.TaskGroups[0].Count = 5 2103 tgName := job.TaskGroups[0].Name 2104 now := time.Now() 2105 2106 // Set up reschedule policy and update stanza 2107 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2108 Attempts: 1, 2109 Interval: 24 * time.Hour, 2110 Delay: 5 * time.Second, 2111 DelayFunction: "", 2112 MaxDelay: 1 * time.Hour, 2113 Unlimited: false, 2114 } 2115 job.TaskGroups[0].Update = canaryUpdate 2116 2117 job2 := job.Copy() 2118 job2.Version++ 2119 2120 d := structs.NewDeployment(job2) 2121 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2122 s := &structs.DeploymentState{ 2123 DesiredCanaries: 2, 2124 DesiredTotal: 5, 2125 } 2126 d.TaskGroups[job.TaskGroups[0].Name] = s 2127 2128 // Create 5 existing allocations 2129 var allocs []*structs.Allocation 2130 for i := 0; i < 5; i++ { 2131 alloc := mock.Alloc() 2132 alloc.Job = job 2133 alloc.JobID = job.ID 2134 alloc.NodeID = uuid.Generate() 2135 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2136 allocs = append(allocs, alloc) 2137 alloc.ClientStatus = structs.AllocClientStatusRunning 2138 } 2139 2140 // Create 2 healthy canary allocations 2141 for i := 0; i < 2; i++ { 2142 alloc := mock.Alloc() 2143 alloc.Job = job 2144 alloc.JobID = job.ID 2145 alloc.NodeID = uuid.Generate() 2146 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2147 alloc.ClientStatus = structs.AllocClientStatusRunning 2148 alloc.DeploymentID = d.ID 2149 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2150 Canary: true, 2151 Healthy: helper.BoolToPtr(false), 2152 } 2153 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2154 allocs = append(allocs, alloc) 2155 } 2156 2157 // Mark the canaries as failed 2158 allocs[5].ClientStatus = structs.AllocClientStatusFailed 2159 allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2160 2161 // Mark one of them as already rescheduled once 2162 allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2163 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 2164 PrevAllocID: uuid.Generate(), 2165 PrevNodeID: uuid.Generate(), 2166 }, 2167 }} 2168 2169 allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 2170 StartedAt: now.Add(-1 * time.Hour), 2171 FinishedAt: now.Add(-10 * time.Second)}} 2172 allocs[6].ClientStatus = structs.AllocClientStatusFailed 2173 allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2174 2175 // Create 4 unhealthy canary allocations that have already been replaced 2176 for i := 0; i < 4; i++ { 2177 alloc := mock.Alloc() 2178 alloc.Job = job 2179 alloc.JobID = job.ID 2180 alloc.NodeID = uuid.Generate() 2181 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 2182 alloc.ClientStatus = structs.AllocClientStatusFailed 2183 alloc.DeploymentID = d.ID 2184 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2185 Canary: true, 2186 Healthy: helper.BoolToPtr(false), 2187 } 2188 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2189 allocs = append(allocs, alloc) 2190 } 2191 2192 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2193 reconciler.now = now 2194 r := reconciler.Compute() 2195 2196 // Verify that no follow up evals were created 2197 evals := r.desiredFollowupEvals[tgName] 2198 require.Nil(evals) 2199 2200 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2201 assertResults(t, r, &resultExpectation{ 2202 createDeployment: nil, 2203 deploymentUpdates: nil, 2204 place: 1, 2205 inplace: 0, 2206 stop: 0, 2207 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2208 job.TaskGroups[0].Name: { 2209 Place: 1, 2210 Ignore: 10, 2211 }, 2212 }, 2213 }) 2214 2215 // Rescheduled allocs should have previous allocs 2216 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2217 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 2218 assertPlacementsAreRescheduled(t, 1, r.place) 2219 } 2220 2221 // Tests failed service allocations that were already rescheduled won't be rescheduled again 2222 func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) { 2223 // Set desired 5 2224 job := mock.Job() 2225 job.TaskGroups[0].Count = 5 2226 2227 // Set up reschedule policy 2228 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour} 2229 2230 // Create 7 existing allocations 2231 var allocs []*structs.Allocation 2232 for i := 0; i < 7; i++ { 2233 alloc := mock.Alloc() 2234 alloc.Job = job 2235 alloc.JobID = job.ID 2236 alloc.NodeID = uuid.Generate() 2237 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2238 allocs = append(allocs, alloc) 2239 alloc.ClientStatus = structs.AllocClientStatusRunning 2240 } 2241 // Mark two as failed and rescheduled 2242 allocs[0].ClientStatus = structs.AllocClientStatusFailed 2243 allocs[0].ID = allocs[1].ID 2244 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2245 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2246 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 2247 PrevAllocID: uuid.Generate(), 2248 PrevNodeID: uuid.Generate(), 2249 }, 2250 }} 2251 allocs[1].NextAllocation = allocs[2].ID 2252 2253 // Mark one as desired state stop 2254 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 2255 2256 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2257 r := reconciler.Compute() 2258 2259 // Should place 1 - one is a new placement to make up the desired count of 5 2260 // failing allocs are not rescheduled 2261 assertResults(t, r, &resultExpectation{ 2262 createDeployment: nil, 2263 deploymentUpdates: nil, 2264 place: 1, 2265 inplace: 0, 2266 stop: 0, 2267 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2268 job.TaskGroups[0].Name: { 2269 Place: 1, 2270 Ignore: 4, 2271 }, 2272 }, 2273 }) 2274 2275 // name index 0 is used for the replacement because its 2276 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 2277 } 2278 2279 // Tests the reconciler cancels an old deployment when the job is being stopped 2280 func TestReconciler_CancelDeployment_JobStop(t *testing.T) { 2281 job := mock.Job() 2282 job.Stop = true 2283 2284 running := structs.NewDeployment(job) 2285 failed := structs.NewDeployment(job) 2286 failed.Status = structs.DeploymentStatusFailed 2287 2288 cases := []struct { 2289 name string 2290 job *structs.Job 2291 jobID, taskGroup string 2292 deployment *structs.Deployment 2293 cancel bool 2294 }{ 2295 { 2296 name: "stopped job, running deployment", 2297 job: job, 2298 jobID: job.ID, 2299 taskGroup: job.TaskGroups[0].Name, 2300 deployment: running, 2301 cancel: true, 2302 }, 2303 { 2304 name: "nil job, running deployment", 2305 job: nil, 2306 jobID: "foo", 2307 taskGroup: "bar", 2308 deployment: running, 2309 cancel: true, 2310 }, 2311 { 2312 name: "stopped job, failed deployment", 2313 job: job, 2314 jobID: job.ID, 2315 taskGroup: job.TaskGroups[0].Name, 2316 deployment: failed, 2317 cancel: false, 2318 }, 2319 { 2320 name: "nil job, failed deployment", 2321 job: nil, 2322 jobID: "foo", 2323 taskGroup: "bar", 2324 deployment: failed, 2325 cancel: false, 2326 }, 2327 } 2328 2329 for _, c := range cases { 2330 t.Run(c.name, func(t *testing.T) { 2331 // Create 10 allocations 2332 var allocs []*structs.Allocation 2333 for i := 0; i < 10; i++ { 2334 alloc := mock.Alloc() 2335 alloc.Job = c.job 2336 alloc.JobID = c.jobID 2337 alloc.NodeID = uuid.Generate() 2338 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 2339 alloc.TaskGroup = c.taskGroup 2340 allocs = append(allocs, alloc) 2341 } 2342 2343 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "") 2344 r := reconciler.Compute() 2345 2346 var updates []*structs.DeploymentStatusUpdate 2347 if c.cancel { 2348 updates = []*structs.DeploymentStatusUpdate{ 2349 { 2350 DeploymentID: c.deployment.ID, 2351 Status: structs.DeploymentStatusCancelled, 2352 StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, 2353 }, 2354 } 2355 } 2356 2357 // Assert the correct results 2358 assertResults(t, r, &resultExpectation{ 2359 createDeployment: nil, 2360 deploymentUpdates: updates, 2361 place: 0, 2362 inplace: 0, 2363 stop: 10, 2364 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2365 c.taskGroup: { 2366 Stop: 10, 2367 }, 2368 }, 2369 }) 2370 2371 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 2372 }) 2373 } 2374 } 2375 2376 // Tests the reconciler cancels an old deployment when the job is updated 2377 func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) { 2378 // Create a base job 2379 job := mock.Job() 2380 2381 // Create two deployments 2382 running := structs.NewDeployment(job) 2383 failed := structs.NewDeployment(job) 2384 failed.Status = structs.DeploymentStatusFailed 2385 2386 // Make the job newer than the deployment 2387 job.Version += 10 2388 2389 cases := []struct { 2390 name string 2391 deployment *structs.Deployment 2392 cancel bool 2393 }{ 2394 { 2395 name: "running deployment", 2396 deployment: running, 2397 cancel: true, 2398 }, 2399 { 2400 name: "failed deployment", 2401 deployment: failed, 2402 cancel: false, 2403 }, 2404 } 2405 2406 for _, c := range cases { 2407 t.Run(c.name, func(t *testing.T) { 2408 // Create 10 allocations 2409 var allocs []*structs.Allocation 2410 for i := 0; i < 10; i++ { 2411 alloc := mock.Alloc() 2412 alloc.Job = job 2413 alloc.JobID = job.ID 2414 alloc.NodeID = uuid.Generate() 2415 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2416 alloc.TaskGroup = job.TaskGroups[0].Name 2417 allocs = append(allocs, alloc) 2418 } 2419 2420 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "") 2421 r := reconciler.Compute() 2422 2423 var updates []*structs.DeploymentStatusUpdate 2424 if c.cancel { 2425 updates = []*structs.DeploymentStatusUpdate{ 2426 { 2427 DeploymentID: c.deployment.ID, 2428 Status: structs.DeploymentStatusCancelled, 2429 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 2430 }, 2431 } 2432 } 2433 2434 // Assert the correct results 2435 assertResults(t, r, &resultExpectation{ 2436 createDeployment: nil, 2437 deploymentUpdates: updates, 2438 place: 0, 2439 inplace: 0, 2440 stop: 0, 2441 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2442 job.TaskGroups[0].Name: { 2443 Ignore: 10, 2444 }, 2445 }, 2446 }) 2447 }) 2448 } 2449 } 2450 2451 // Tests the reconciler creates a deployment and does a rolling upgrade with 2452 // destructive changes 2453 func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) { 2454 job := mock.Job() 2455 job.TaskGroups[0].Update = noCanaryUpdate 2456 2457 // Create 10 allocations from the old job 2458 var allocs []*structs.Allocation 2459 for i := 0; i < 10; i++ { 2460 alloc := mock.Alloc() 2461 alloc.Job = job 2462 alloc.JobID = job.ID 2463 alloc.NodeID = uuid.Generate() 2464 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2465 alloc.TaskGroup = job.TaskGroups[0].Name 2466 allocs = append(allocs, alloc) 2467 } 2468 2469 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2470 r := reconciler.Compute() 2471 2472 d := structs.NewDeployment(job) 2473 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2474 DesiredTotal: 10, 2475 } 2476 2477 // Assert the correct results 2478 assertResults(t, r, &resultExpectation{ 2479 createDeployment: d, 2480 deploymentUpdates: nil, 2481 destructive: 4, 2482 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2483 job.TaskGroups[0].Name: { 2484 DestructiveUpdate: 4, 2485 Ignore: 6, 2486 }, 2487 }, 2488 }) 2489 2490 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 2491 } 2492 2493 // Tests the reconciler creates a deployment for inplace updates 2494 func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) { 2495 jobOld := mock.Job() 2496 job := jobOld.Copy() 2497 job.Version++ 2498 job.TaskGroups[0].Update = noCanaryUpdate 2499 2500 // Create 10 allocations from the old job 2501 var allocs []*structs.Allocation 2502 for i := 0; i < 10; i++ { 2503 alloc := mock.Alloc() 2504 alloc.Job = jobOld 2505 alloc.JobID = job.ID 2506 alloc.NodeID = uuid.Generate() 2507 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2508 alloc.TaskGroup = job.TaskGroups[0].Name 2509 allocs = append(allocs, alloc) 2510 } 2511 2512 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 2513 r := reconciler.Compute() 2514 2515 d := structs.NewDeployment(job) 2516 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2517 DesiredTotal: 10, 2518 } 2519 2520 // Assert the correct results 2521 assertResults(t, r, &resultExpectation{ 2522 createDeployment: d, 2523 deploymentUpdates: nil, 2524 place: 0, 2525 inplace: 10, 2526 stop: 0, 2527 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2528 job.TaskGroups[0].Name: { 2529 InPlaceUpdate: 10, 2530 }, 2531 }, 2532 }) 2533 } 2534 2535 // Tests the reconciler creates a deployment when the job has a newer create index 2536 func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) { 2537 jobOld := mock.Job() 2538 job := jobOld.Copy() 2539 job.TaskGroups[0].Update = noCanaryUpdate 2540 job.CreateIndex += 100 2541 2542 // Create 5 allocations from the old job 2543 var allocs []*structs.Allocation 2544 for i := 0; i < 5; i++ { 2545 alloc := mock.Alloc() 2546 alloc.Job = jobOld 2547 alloc.JobID = jobOld.ID 2548 alloc.NodeID = uuid.Generate() 2549 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2550 alloc.TaskGroup = job.TaskGroups[0].Name 2551 allocs = append(allocs, alloc) 2552 } 2553 2554 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2555 r := reconciler.Compute() 2556 2557 d := structs.NewDeployment(job) 2558 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2559 DesiredTotal: 5, 2560 } 2561 2562 // Assert the correct results 2563 assertResults(t, r, &resultExpectation{ 2564 createDeployment: d, 2565 deploymentUpdates: nil, 2566 place: 5, 2567 destructive: 0, 2568 inplace: 0, 2569 stop: 0, 2570 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2571 job.TaskGroups[0].Name: { 2572 InPlaceUpdate: 0, 2573 Ignore: 5, 2574 Place: 5, 2575 DestructiveUpdate: 0, 2576 }, 2577 }, 2578 }) 2579 } 2580 2581 // Tests the reconciler doesn't creates a deployment if there are no changes 2582 func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) { 2583 job := mock.Job() 2584 job.TaskGroups[0].Update = noCanaryUpdate 2585 2586 // Create 10 allocations from the job 2587 var allocs []*structs.Allocation 2588 for i := 0; i < 10; i++ { 2589 alloc := mock.Alloc() 2590 alloc.Job = job 2591 alloc.JobID = job.ID 2592 alloc.NodeID = uuid.Generate() 2593 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2594 alloc.TaskGroup = job.TaskGroups[0].Name 2595 allocs = append(allocs, alloc) 2596 } 2597 2598 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2599 r := reconciler.Compute() 2600 2601 // Assert the correct results 2602 assertResults(t, r, &resultExpectation{ 2603 createDeployment: nil, 2604 deploymentUpdates: nil, 2605 place: 0, 2606 inplace: 0, 2607 stop: 0, 2608 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2609 job.TaskGroups[0].Name: { 2610 DestructiveUpdate: 0, 2611 Ignore: 10, 2612 }, 2613 }, 2614 }) 2615 } 2616 2617 // Tests the reconciler doesn't place any more canaries when the deployment is 2618 // paused or failed 2619 func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) { 2620 job := mock.Job() 2621 job.TaskGroups[0].Update = canaryUpdate 2622 2623 cases := []struct { 2624 name string 2625 deploymentStatus string 2626 stop uint64 2627 }{ 2628 { 2629 name: "paused deployment", 2630 deploymentStatus: structs.DeploymentStatusPaused, 2631 stop: 0, 2632 }, 2633 { 2634 name: "failed deployment", 2635 deploymentStatus: structs.DeploymentStatusFailed, 2636 stop: 1, 2637 }, 2638 } 2639 2640 for _, c := range cases { 2641 t.Run(c.name, func(t *testing.T) { 2642 // Create a deployment that is paused/failed and has placed some canaries 2643 d := structs.NewDeployment(job) 2644 d.Status = c.deploymentStatus 2645 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2646 Promoted: false, 2647 DesiredCanaries: 2, 2648 DesiredTotal: 10, 2649 PlacedAllocs: 1, 2650 } 2651 2652 // Create 10 allocations for the original job 2653 var allocs []*structs.Allocation 2654 for i := 0; i < 10; i++ { 2655 alloc := mock.Alloc() 2656 alloc.Job = job 2657 alloc.JobID = job.ID 2658 alloc.NodeID = uuid.Generate() 2659 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2660 alloc.TaskGroup = job.TaskGroups[0].Name 2661 allocs = append(allocs, alloc) 2662 } 2663 2664 // Create one canary 2665 canary := mock.Alloc() 2666 canary.Job = job 2667 canary.JobID = job.ID 2668 canary.NodeID = uuid.Generate() 2669 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2670 canary.TaskGroup = job.TaskGroups[0].Name 2671 canary.DeploymentID = d.ID 2672 allocs = append(allocs, canary) 2673 d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID} 2674 2675 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2676 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2677 r := reconciler.Compute() 2678 2679 // Assert the correct results 2680 assertResults(t, r, &resultExpectation{ 2681 createDeployment: nil, 2682 deploymentUpdates: nil, 2683 place: 0, 2684 inplace: 0, 2685 stop: int(c.stop), 2686 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2687 job.TaskGroups[0].Name: { 2688 Ignore: 11 - c.stop, 2689 Stop: c.stop, 2690 }, 2691 }, 2692 }) 2693 }) 2694 } 2695 } 2696 2697 // Tests the reconciler doesn't place any more allocs when the deployment is 2698 // paused or failed 2699 func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) { 2700 job := mock.Job() 2701 job.TaskGroups[0].Update = noCanaryUpdate 2702 job.TaskGroups[0].Count = 15 2703 2704 cases := []struct { 2705 name string 2706 deploymentStatus string 2707 }{ 2708 { 2709 name: "paused deployment", 2710 deploymentStatus: structs.DeploymentStatusPaused, 2711 }, 2712 { 2713 name: "failed deployment", 2714 deploymentStatus: structs.DeploymentStatusFailed, 2715 }, 2716 } 2717 2718 for _, c := range cases { 2719 t.Run(c.name, func(t *testing.T) { 2720 // Create a deployment that is paused and has placed some canaries 2721 d := structs.NewDeployment(job) 2722 d.Status = c.deploymentStatus 2723 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2724 Promoted: false, 2725 DesiredTotal: 15, 2726 PlacedAllocs: 10, 2727 } 2728 2729 // Create 10 allocations for the new job 2730 var allocs []*structs.Allocation 2731 for i := 0; i < 10; i++ { 2732 alloc := mock.Alloc() 2733 alloc.Job = job 2734 alloc.JobID = job.ID 2735 alloc.NodeID = uuid.Generate() 2736 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2737 alloc.TaskGroup = job.TaskGroups[0].Name 2738 allocs = append(allocs, alloc) 2739 } 2740 2741 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 2742 r := reconciler.Compute() 2743 2744 // Assert the correct results 2745 assertResults(t, r, &resultExpectation{ 2746 createDeployment: nil, 2747 deploymentUpdates: nil, 2748 place: 0, 2749 inplace: 0, 2750 stop: 0, 2751 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2752 job.TaskGroups[0].Name: { 2753 Ignore: 10, 2754 }, 2755 }, 2756 }) 2757 }) 2758 } 2759 } 2760 2761 // Tests the reconciler doesn't do any more destructive updates when the 2762 // deployment is paused or failed 2763 func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) { 2764 job := mock.Job() 2765 job.TaskGroups[0].Update = noCanaryUpdate 2766 2767 cases := []struct { 2768 name string 2769 deploymentStatus string 2770 }{ 2771 { 2772 name: "paused deployment", 2773 deploymentStatus: structs.DeploymentStatusPaused, 2774 }, 2775 { 2776 name: "failed deployment", 2777 deploymentStatus: structs.DeploymentStatusFailed, 2778 }, 2779 } 2780 2781 for _, c := range cases { 2782 t.Run(c.name, func(t *testing.T) { 2783 // Create a deployment that is paused and has placed some canaries 2784 d := structs.NewDeployment(job) 2785 d.Status = c.deploymentStatus 2786 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2787 Promoted: false, 2788 DesiredTotal: 10, 2789 PlacedAllocs: 1, 2790 } 2791 2792 // Create 9 allocations for the original job 2793 var allocs []*structs.Allocation 2794 for i := 1; i < 10; i++ { 2795 alloc := mock.Alloc() 2796 alloc.Job = job 2797 alloc.JobID = job.ID 2798 alloc.NodeID = uuid.Generate() 2799 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2800 alloc.TaskGroup = job.TaskGroups[0].Name 2801 allocs = append(allocs, alloc) 2802 } 2803 2804 // Create one for the new job 2805 newAlloc := mock.Alloc() 2806 newAlloc.Job = job 2807 newAlloc.JobID = job.ID 2808 newAlloc.NodeID = uuid.Generate() 2809 newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2810 newAlloc.TaskGroup = job.TaskGroups[0].Name 2811 newAlloc.DeploymentID = d.ID 2812 allocs = append(allocs, newAlloc) 2813 2814 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2815 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2816 r := reconciler.Compute() 2817 2818 // Assert the correct results 2819 assertResults(t, r, &resultExpectation{ 2820 createDeployment: nil, 2821 deploymentUpdates: nil, 2822 place: 0, 2823 inplace: 0, 2824 stop: 0, 2825 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2826 job.TaskGroups[0].Name: { 2827 Ignore: 10, 2828 }, 2829 }, 2830 }) 2831 }) 2832 } 2833 } 2834 2835 // Tests the reconciler handles migrations correctly when a deployment is paused 2836 // or failed 2837 func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) { 2838 job := mock.Job() 2839 job.TaskGroups[0].Update = noCanaryUpdate 2840 2841 cases := []struct { 2842 name string 2843 deploymentStatus string 2844 place int 2845 stop int 2846 ignoreAnnotation uint64 2847 migrateAnnotation uint64 2848 stopAnnotation uint64 2849 }{ 2850 { 2851 name: "paused deployment", 2852 deploymentStatus: structs.DeploymentStatusPaused, 2853 place: 0, 2854 stop: 3, 2855 ignoreAnnotation: 5, 2856 stopAnnotation: 3, 2857 }, 2858 { 2859 name: "failed deployment", 2860 deploymentStatus: structs.DeploymentStatusFailed, 2861 place: 0, 2862 stop: 3, 2863 ignoreAnnotation: 5, 2864 migrateAnnotation: 0, 2865 stopAnnotation: 3, 2866 }, 2867 } 2868 2869 for _, c := range cases { 2870 t.Run(c.name, func(t *testing.T) { 2871 // Create a deployment that is paused and has placed some canaries 2872 d := structs.NewDeployment(job) 2873 d.Status = c.deploymentStatus 2874 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2875 Promoted: false, 2876 DesiredTotal: 10, 2877 PlacedAllocs: 8, 2878 } 2879 2880 // Create 8 allocations in the deployment 2881 var allocs []*structs.Allocation 2882 for i := 0; i < 8; i++ { 2883 alloc := mock.Alloc() 2884 alloc.Job = job 2885 alloc.JobID = job.ID 2886 alloc.NodeID = uuid.Generate() 2887 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2888 alloc.TaskGroup = job.TaskGroups[0].Name 2889 alloc.DeploymentID = d.ID 2890 allocs = append(allocs, alloc) 2891 } 2892 2893 // Build a map of tainted nodes 2894 tainted := make(map[string]*structs.Node, 3) 2895 for i := 0; i < 3; i++ { 2896 n := mock.Node() 2897 n.ID = allocs[i].NodeID 2898 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 2899 n.Drain = true 2900 tainted[n.ID] = n 2901 } 2902 2903 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, tainted, "") 2904 r := reconciler.Compute() 2905 2906 // Assert the correct results 2907 assertResults(t, r, &resultExpectation{ 2908 createDeployment: nil, 2909 deploymentUpdates: nil, 2910 place: c.place, 2911 inplace: 0, 2912 stop: c.stop, 2913 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2914 job.TaskGroups[0].Name: { 2915 Migrate: c.migrateAnnotation, 2916 Ignore: c.ignoreAnnotation, 2917 Stop: c.stopAnnotation, 2918 }, 2919 }, 2920 }) 2921 }) 2922 } 2923 } 2924 2925 // Tests the reconciler handles migrating a canary correctly on a draining node 2926 func TestReconciler_DrainNode_Canary(t *testing.T) { 2927 job := mock.Job() 2928 job.TaskGroups[0].Update = canaryUpdate 2929 2930 // Create a deployment that is paused and has placed some canaries 2931 d := structs.NewDeployment(job) 2932 s := &structs.DeploymentState{ 2933 Promoted: false, 2934 DesiredTotal: 10, 2935 DesiredCanaries: 2, 2936 PlacedAllocs: 2, 2937 } 2938 d.TaskGroups[job.TaskGroups[0].Name] = s 2939 2940 // Create 10 allocations from the old job 2941 var allocs []*structs.Allocation 2942 for i := 0; i < 10; i++ { 2943 alloc := mock.Alloc() 2944 alloc.Job = job 2945 alloc.JobID = job.ID 2946 alloc.NodeID = uuid.Generate() 2947 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2948 alloc.TaskGroup = job.TaskGroups[0].Name 2949 allocs = append(allocs, alloc) 2950 } 2951 2952 // Create two canaries for the new job 2953 handled := make(map[string]allocUpdateType) 2954 for i := 0; i < 2; i++ { 2955 // Create one canary 2956 canary := mock.Alloc() 2957 canary.Job = job 2958 canary.JobID = job.ID 2959 canary.NodeID = uuid.Generate() 2960 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2961 canary.TaskGroup = job.TaskGroups[0].Name 2962 canary.DeploymentID = d.ID 2963 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2964 allocs = append(allocs, canary) 2965 handled[canary.ID] = allocUpdateFnIgnore 2966 } 2967 2968 // Build a map of tainted nodes that contains the last canary 2969 tainted := make(map[string]*structs.Node, 1) 2970 n := mock.Node() 2971 n.ID = allocs[11].NodeID 2972 allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true) 2973 n.Drain = true 2974 tainted[n.ID] = n 2975 2976 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2977 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 2978 r := reconciler.Compute() 2979 2980 // Assert the correct results 2981 assertResults(t, r, &resultExpectation{ 2982 createDeployment: nil, 2983 deploymentUpdates: nil, 2984 place: 1, 2985 inplace: 0, 2986 stop: 1, 2987 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2988 job.TaskGroups[0].Name: { 2989 Canary: 1, 2990 Ignore: 11, 2991 }, 2992 }, 2993 }) 2994 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 2995 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2996 } 2997 2998 // Tests the reconciler handles migrating a canary correctly on a lost node 2999 func TestReconciler_LostNode_Canary(t *testing.T) { 3000 job := mock.Job() 3001 job.TaskGroups[0].Update = canaryUpdate 3002 3003 // Create a deployment that is paused and has placed some canaries 3004 d := structs.NewDeployment(job) 3005 s := &structs.DeploymentState{ 3006 Promoted: false, 3007 DesiredTotal: 10, 3008 DesiredCanaries: 2, 3009 PlacedAllocs: 2, 3010 } 3011 d.TaskGroups[job.TaskGroups[0].Name] = s 3012 3013 // Create 10 allocations from the old job 3014 var allocs []*structs.Allocation 3015 for i := 0; i < 10; i++ { 3016 alloc := mock.Alloc() 3017 alloc.Job = job 3018 alloc.JobID = job.ID 3019 alloc.NodeID = uuid.Generate() 3020 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3021 alloc.TaskGroup = job.TaskGroups[0].Name 3022 allocs = append(allocs, alloc) 3023 } 3024 3025 // Create two canaries for the new job 3026 handled := make(map[string]allocUpdateType) 3027 for i := 0; i < 2; i++ { 3028 // Create one canary 3029 canary := mock.Alloc() 3030 canary.Job = job 3031 canary.JobID = job.ID 3032 canary.NodeID = uuid.Generate() 3033 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3034 canary.TaskGroup = job.TaskGroups[0].Name 3035 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3036 canary.DeploymentID = d.ID 3037 allocs = append(allocs, canary) 3038 handled[canary.ID] = allocUpdateFnIgnore 3039 } 3040 3041 // Build a map of tainted nodes that contains the last canary 3042 tainted := make(map[string]*structs.Node, 1) 3043 n := mock.Node() 3044 n.ID = allocs[11].NodeID 3045 n.Status = structs.NodeStatusDown 3046 tainted[n.ID] = n 3047 3048 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3049 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3050 r := reconciler.Compute() 3051 3052 // Assert the correct results 3053 assertResults(t, r, &resultExpectation{ 3054 createDeployment: nil, 3055 deploymentUpdates: nil, 3056 place: 1, 3057 inplace: 0, 3058 stop: 1, 3059 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3060 job.TaskGroups[0].Name: { 3061 Canary: 1, 3062 Ignore: 11, 3063 }, 3064 }, 3065 }) 3066 3067 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 3068 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 3069 } 3070 3071 // Tests the reconciler handles stopping canaries from older deployments 3072 func TestReconciler_StopOldCanaries(t *testing.T) { 3073 job := mock.Job() 3074 job.TaskGroups[0].Update = canaryUpdate 3075 3076 // Create an old deployment that has placed some canaries 3077 d := structs.NewDeployment(job) 3078 s := &structs.DeploymentState{ 3079 Promoted: false, 3080 DesiredTotal: 10, 3081 DesiredCanaries: 2, 3082 PlacedAllocs: 2, 3083 } 3084 d.TaskGroups[job.TaskGroups[0].Name] = s 3085 3086 // Update the job 3087 job.Version += 10 3088 3089 // Create 10 allocations from the old job 3090 var allocs []*structs.Allocation 3091 for i := 0; i < 10; i++ { 3092 alloc := mock.Alloc() 3093 alloc.Job = job 3094 alloc.JobID = job.ID 3095 alloc.NodeID = uuid.Generate() 3096 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3097 alloc.TaskGroup = job.TaskGroups[0].Name 3098 allocs = append(allocs, alloc) 3099 } 3100 3101 // Create canaries 3102 for i := 0; i < 2; i++ { 3103 // Create one canary 3104 canary := mock.Alloc() 3105 canary.Job = job 3106 canary.JobID = job.ID 3107 canary.NodeID = uuid.Generate() 3108 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3109 canary.TaskGroup = job.TaskGroups[0].Name 3110 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3111 canary.DeploymentID = d.ID 3112 allocs = append(allocs, canary) 3113 } 3114 3115 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3116 r := reconciler.Compute() 3117 3118 newD := structs.NewDeployment(job) 3119 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3120 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3121 DesiredCanaries: 2, 3122 DesiredTotal: 10, 3123 } 3124 3125 // Assert the correct results 3126 assertResults(t, r, &resultExpectation{ 3127 createDeployment: newD, 3128 deploymentUpdates: []*structs.DeploymentStatusUpdate{ 3129 { 3130 DeploymentID: d.ID, 3131 Status: structs.DeploymentStatusCancelled, 3132 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 3133 }, 3134 }, 3135 place: 2, 3136 inplace: 0, 3137 stop: 2, 3138 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3139 job.TaskGroups[0].Name: { 3140 Canary: 2, 3141 Stop: 2, 3142 Ignore: 10, 3143 }, 3144 }, 3145 }) 3146 3147 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3148 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3149 } 3150 3151 // Tests the reconciler creates new canaries when the job changes 3152 func TestReconciler_NewCanaries(t *testing.T) { 3153 job := mock.Job() 3154 job.TaskGroups[0].Update = canaryUpdate 3155 3156 // Create 10 allocations from the old job 3157 var allocs []*structs.Allocation 3158 for i := 0; i < 10; i++ { 3159 alloc := mock.Alloc() 3160 alloc.Job = job 3161 alloc.JobID = job.ID 3162 alloc.NodeID = uuid.Generate() 3163 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3164 alloc.TaskGroup = job.TaskGroups[0].Name 3165 allocs = append(allocs, alloc) 3166 } 3167 3168 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3169 r := reconciler.Compute() 3170 3171 newD := structs.NewDeployment(job) 3172 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3173 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3174 DesiredCanaries: 2, 3175 DesiredTotal: 10, 3176 } 3177 3178 // Assert the correct results 3179 assertResults(t, r, &resultExpectation{ 3180 createDeployment: newD, 3181 deploymentUpdates: nil, 3182 place: 2, 3183 inplace: 0, 3184 stop: 0, 3185 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3186 job.TaskGroups[0].Name: { 3187 Canary: 2, 3188 Ignore: 10, 3189 }, 3190 }, 3191 }) 3192 3193 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3194 } 3195 3196 // Tests the reconciler creates new canaries when the job changes and the 3197 // canary count is greater than the task group count 3198 func TestReconciler_NewCanaries_CountGreater(t *testing.T) { 3199 job := mock.Job() 3200 job.TaskGroups[0].Count = 3 3201 job.TaskGroups[0].Update = canaryUpdate.Copy() 3202 job.TaskGroups[0].Update.Canary = 7 3203 3204 // Create 3 allocations from the old job 3205 var allocs []*structs.Allocation 3206 for i := 0; i < 3; i++ { 3207 alloc := mock.Alloc() 3208 alloc.Job = job 3209 alloc.JobID = job.ID 3210 alloc.NodeID = uuid.Generate() 3211 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3212 alloc.TaskGroup = job.TaskGroups[0].Name 3213 allocs = append(allocs, alloc) 3214 } 3215 3216 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3217 r := reconciler.Compute() 3218 3219 newD := structs.NewDeployment(job) 3220 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3221 state := &structs.DeploymentState{ 3222 DesiredCanaries: 7, 3223 DesiredTotal: 3, 3224 } 3225 newD.TaskGroups[job.TaskGroups[0].Name] = state 3226 3227 // Assert the correct results 3228 assertResults(t, r, &resultExpectation{ 3229 createDeployment: newD, 3230 deploymentUpdates: nil, 3231 place: 7, 3232 inplace: 0, 3233 stop: 0, 3234 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3235 job.TaskGroups[0].Name: { 3236 Canary: 7, 3237 Ignore: 3, 3238 }, 3239 }, 3240 }) 3241 3242 assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place)) 3243 } 3244 3245 // Tests the reconciler creates new canaries when the job changes for multiple 3246 // task groups 3247 func TestReconciler_NewCanaries_MultiTG(t *testing.T) { 3248 job := mock.Job() 3249 job.TaskGroups[0].Update = canaryUpdate 3250 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3251 job.TaskGroups[0].Name = "tg2" 3252 3253 // Create 10 allocations from the old job for each tg 3254 var allocs []*structs.Allocation 3255 for j := 0; j < 2; j++ { 3256 for i := 0; i < 10; i++ { 3257 alloc := mock.Alloc() 3258 alloc.Job = job 3259 alloc.JobID = job.ID 3260 alloc.NodeID = uuid.Generate() 3261 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i)) 3262 alloc.TaskGroup = job.TaskGroups[j].Name 3263 allocs = append(allocs, alloc) 3264 } 3265 } 3266 3267 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3268 r := reconciler.Compute() 3269 3270 newD := structs.NewDeployment(job) 3271 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3272 state := &structs.DeploymentState{ 3273 DesiredCanaries: 2, 3274 DesiredTotal: 10, 3275 } 3276 newD.TaskGroups[job.TaskGroups[0].Name] = state 3277 newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy() 3278 3279 // Assert the correct results 3280 assertResults(t, r, &resultExpectation{ 3281 createDeployment: newD, 3282 deploymentUpdates: nil, 3283 place: 4, 3284 inplace: 0, 3285 stop: 0, 3286 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3287 job.TaskGroups[0].Name: { 3288 Canary: 2, 3289 Ignore: 10, 3290 }, 3291 job.TaskGroups[1].Name: { 3292 Canary: 2, 3293 Ignore: 10, 3294 }, 3295 }, 3296 }) 3297 3298 assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place)) 3299 } 3300 3301 // Tests the reconciler creates new canaries when the job changes and scales up 3302 func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { 3303 // Scale the job up to 15 3304 job := mock.Job() 3305 job.TaskGroups[0].Update = canaryUpdate 3306 job.TaskGroups[0].Count = 15 3307 3308 // Create 10 allocations from the old job 3309 var allocs []*structs.Allocation 3310 for i := 0; i < 10; i++ { 3311 alloc := mock.Alloc() 3312 alloc.Job = job 3313 alloc.JobID = job.ID 3314 alloc.NodeID = uuid.Generate() 3315 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3316 alloc.TaskGroup = job.TaskGroups[0].Name 3317 allocs = append(allocs, alloc) 3318 } 3319 3320 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3321 r := reconciler.Compute() 3322 3323 newD := structs.NewDeployment(job) 3324 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3325 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3326 DesiredCanaries: 2, 3327 DesiredTotal: 15, 3328 } 3329 3330 // Assert the correct results 3331 assertResults(t, r, &resultExpectation{ 3332 createDeployment: newD, 3333 deploymentUpdates: nil, 3334 place: 2, 3335 inplace: 0, 3336 stop: 0, 3337 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3338 job.TaskGroups[0].Name: { 3339 Canary: 2, 3340 Ignore: 10, 3341 }, 3342 }, 3343 }) 3344 3345 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3346 } 3347 3348 // Tests the reconciler creates new canaries when the job changes and scales 3349 // down 3350 func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { 3351 // Scale the job down to 5 3352 job := mock.Job() 3353 job.TaskGroups[0].Update = canaryUpdate 3354 job.TaskGroups[0].Count = 5 3355 3356 // Create 10 allocations from the old job 3357 var allocs []*structs.Allocation 3358 for i := 0; i < 10; i++ { 3359 alloc := mock.Alloc() 3360 alloc.Job = job 3361 alloc.JobID = job.ID 3362 alloc.NodeID = uuid.Generate() 3363 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3364 alloc.TaskGroup = job.TaskGroups[0].Name 3365 allocs = append(allocs, alloc) 3366 } 3367 3368 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3369 r := reconciler.Compute() 3370 3371 newD := structs.NewDeployment(job) 3372 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3373 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3374 DesiredCanaries: 2, 3375 DesiredTotal: 5, 3376 } 3377 3378 // Assert the correct results 3379 assertResults(t, r, &resultExpectation{ 3380 createDeployment: newD, 3381 deploymentUpdates: nil, 3382 place: 2, 3383 inplace: 0, 3384 stop: 5, 3385 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3386 job.TaskGroups[0].Name: { 3387 Canary: 2, 3388 Stop: 5, 3389 Ignore: 5, 3390 }, 3391 }, 3392 }) 3393 3394 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3395 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 3396 } 3397 3398 // Tests the reconciler handles filling the names of partially placed canaries 3399 func TestReconciler_NewCanaries_FillNames(t *testing.T) { 3400 job := mock.Job() 3401 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 3402 Canary: 4, 3403 MaxParallel: 2, 3404 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 3405 MinHealthyTime: 10 * time.Second, 3406 HealthyDeadline: 10 * time.Minute, 3407 } 3408 3409 // Create an existing deployment that has placed some canaries 3410 d := structs.NewDeployment(job) 3411 s := &structs.DeploymentState{ 3412 Promoted: false, 3413 DesiredTotal: 10, 3414 DesiredCanaries: 4, 3415 PlacedAllocs: 2, 3416 } 3417 d.TaskGroups[job.TaskGroups[0].Name] = s 3418 3419 // Create 10 allocations from the old job 3420 var allocs []*structs.Allocation 3421 for i := 0; i < 10; i++ { 3422 alloc := mock.Alloc() 3423 alloc.Job = job 3424 alloc.JobID = job.ID 3425 alloc.NodeID = uuid.Generate() 3426 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3427 alloc.TaskGroup = job.TaskGroups[0].Name 3428 allocs = append(allocs, alloc) 3429 } 3430 3431 // Create canaries but pick names at the ends 3432 for i := 0; i < 4; i += 3 { 3433 // Create one canary 3434 canary := mock.Alloc() 3435 canary.Job = job 3436 canary.JobID = job.ID 3437 canary.NodeID = uuid.Generate() 3438 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3439 canary.TaskGroup = job.TaskGroups[0].Name 3440 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3441 canary.DeploymentID = d.ID 3442 allocs = append(allocs, canary) 3443 } 3444 3445 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3446 r := reconciler.Compute() 3447 3448 // Assert the correct results 3449 assertResults(t, r, &resultExpectation{ 3450 createDeployment: nil, 3451 deploymentUpdates: nil, 3452 place: 2, 3453 inplace: 0, 3454 stop: 0, 3455 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3456 job.TaskGroups[0].Name: { 3457 Canary: 2, 3458 Ignore: 12, 3459 }, 3460 }, 3461 }) 3462 3463 assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place)) 3464 } 3465 3466 // Tests the reconciler handles canary promotion by unblocking max_parallel 3467 func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { 3468 job := mock.Job() 3469 job.TaskGroups[0].Update = canaryUpdate 3470 3471 // Create an existing deployment that has placed some canaries and mark them 3472 // promoted 3473 d := structs.NewDeployment(job) 3474 s := &structs.DeploymentState{ 3475 Promoted: true, 3476 DesiredTotal: 10, 3477 DesiredCanaries: 2, 3478 PlacedAllocs: 2, 3479 } 3480 d.TaskGroups[job.TaskGroups[0].Name] = s 3481 3482 // Create 10 allocations from the old job 3483 var allocs []*structs.Allocation 3484 for i := 0; i < 10; i++ { 3485 alloc := mock.Alloc() 3486 alloc.Job = job 3487 alloc.JobID = job.ID 3488 alloc.NodeID = uuid.Generate() 3489 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3490 alloc.TaskGroup = job.TaskGroups[0].Name 3491 allocs = append(allocs, alloc) 3492 } 3493 3494 // Create the canaries 3495 handled := make(map[string]allocUpdateType) 3496 for i := 0; i < 2; i++ { 3497 // Create one canary 3498 canary := mock.Alloc() 3499 canary.Job = job 3500 canary.JobID = job.ID 3501 canary.NodeID = uuid.Generate() 3502 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3503 canary.TaskGroup = job.TaskGroups[0].Name 3504 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3505 canary.DeploymentID = d.ID 3506 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3507 Healthy: helper.BoolToPtr(true), 3508 } 3509 allocs = append(allocs, canary) 3510 handled[canary.ID] = allocUpdateFnIgnore 3511 } 3512 3513 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3514 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3515 r := reconciler.Compute() 3516 3517 // Assert the correct results 3518 assertResults(t, r, &resultExpectation{ 3519 createDeployment: nil, 3520 deploymentUpdates: nil, 3521 destructive: 2, 3522 stop: 2, 3523 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3524 job.TaskGroups[0].Name: { 3525 Stop: 2, 3526 DestructiveUpdate: 2, 3527 Ignore: 8, 3528 }, 3529 }, 3530 }) 3531 3532 assertNoCanariesStopped(t, d, r.stop) 3533 assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate)) 3534 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3535 } 3536 3537 // Tests the reconciler handles canary promotion when the canary count equals 3538 // the total correctly 3539 func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { 3540 job := mock.Job() 3541 job.TaskGroups[0].Update = canaryUpdate 3542 job.TaskGroups[0].Count = 2 3543 3544 // Create an existing deployment that has placed some canaries and mark them 3545 // promoted 3546 d := structs.NewDeployment(job) 3547 s := &structs.DeploymentState{ 3548 Promoted: true, 3549 DesiredTotal: 2, 3550 DesiredCanaries: 2, 3551 PlacedAllocs: 2, 3552 HealthyAllocs: 2, 3553 } 3554 d.TaskGroups[job.TaskGroups[0].Name] = s 3555 3556 // Create 2 allocations from the old job 3557 var allocs []*structs.Allocation 3558 for i := 0; i < 2; i++ { 3559 alloc := mock.Alloc() 3560 alloc.Job = job 3561 alloc.JobID = job.ID 3562 alloc.NodeID = uuid.Generate() 3563 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3564 alloc.TaskGroup = job.TaskGroups[0].Name 3565 allocs = append(allocs, alloc) 3566 } 3567 3568 // Create the canaries 3569 handled := make(map[string]allocUpdateType) 3570 for i := 0; i < 2; i++ { 3571 // Create one canary 3572 canary := mock.Alloc() 3573 canary.Job = job 3574 canary.JobID = job.ID 3575 canary.NodeID = uuid.Generate() 3576 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3577 canary.TaskGroup = job.TaskGroups[0].Name 3578 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3579 canary.DeploymentID = d.ID 3580 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3581 Healthy: helper.BoolToPtr(true), 3582 } 3583 allocs = append(allocs, canary) 3584 handled[canary.ID] = allocUpdateFnIgnore 3585 } 3586 3587 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3588 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3589 r := reconciler.Compute() 3590 3591 updates := []*structs.DeploymentStatusUpdate{ 3592 { 3593 DeploymentID: d.ID, 3594 Status: structs.DeploymentStatusSuccessful, 3595 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3596 }, 3597 } 3598 3599 // Assert the correct results 3600 assertResults(t, r, &resultExpectation{ 3601 createDeployment: nil, 3602 deploymentUpdates: updates, 3603 place: 0, 3604 inplace: 0, 3605 stop: 2, 3606 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3607 job.TaskGroups[0].Name: { 3608 Stop: 2, 3609 Ignore: 2, 3610 }, 3611 }, 3612 }) 3613 3614 assertNoCanariesStopped(t, d, r.stop) 3615 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3616 } 3617 3618 // Tests the reconciler checks the health of placed allocs to determine the 3619 // limit 3620 func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { 3621 job := mock.Job() 3622 job.TaskGroups[0].Update = noCanaryUpdate 3623 3624 cases := []struct { 3625 healthy int 3626 }{ 3627 { 3628 healthy: 0, 3629 }, 3630 { 3631 healthy: 1, 3632 }, 3633 { 3634 healthy: 2, 3635 }, 3636 { 3637 healthy: 3, 3638 }, 3639 { 3640 healthy: 4, 3641 }, 3642 } 3643 3644 for _, c := range cases { 3645 t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) { 3646 // Create an existing deployment that has placed some canaries and mark them 3647 // promoted 3648 d := structs.NewDeployment(job) 3649 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3650 Promoted: true, 3651 DesiredTotal: 10, 3652 PlacedAllocs: 4, 3653 } 3654 3655 // Create 6 allocations from the old job 3656 var allocs []*structs.Allocation 3657 for i := 4; i < 10; i++ { 3658 alloc := mock.Alloc() 3659 alloc.Job = job 3660 alloc.JobID = job.ID 3661 alloc.NodeID = uuid.Generate() 3662 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3663 alloc.TaskGroup = job.TaskGroups[0].Name 3664 allocs = append(allocs, alloc) 3665 } 3666 3667 // Create the new allocs 3668 handled := make(map[string]allocUpdateType) 3669 for i := 0; i < 4; i++ { 3670 new := mock.Alloc() 3671 new.Job = job 3672 new.JobID = job.ID 3673 new.NodeID = uuid.Generate() 3674 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3675 new.TaskGroup = job.TaskGroups[0].Name 3676 new.DeploymentID = d.ID 3677 if i < c.healthy { 3678 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3679 Healthy: helper.BoolToPtr(true), 3680 } 3681 } 3682 allocs = append(allocs, new) 3683 handled[new.ID] = allocUpdateFnIgnore 3684 } 3685 3686 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3687 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3688 r := reconciler.Compute() 3689 3690 // Assert the correct results 3691 assertResults(t, r, &resultExpectation{ 3692 createDeployment: nil, 3693 deploymentUpdates: nil, 3694 destructive: c.healthy, 3695 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3696 job.TaskGroups[0].Name: { 3697 DestructiveUpdate: uint64(c.healthy), 3698 Ignore: uint64(10 - c.healthy), 3699 }, 3700 }, 3701 }) 3702 3703 if c.healthy != 0 { 3704 assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate)) 3705 } 3706 }) 3707 } 3708 } 3709 3710 // Tests the reconciler handles an alloc on a tainted node during a rolling 3711 // update 3712 func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { 3713 job := mock.Job() 3714 job.TaskGroups[0].Update = noCanaryUpdate 3715 3716 // Create an existing deployment that has some placed allocs 3717 d := structs.NewDeployment(job) 3718 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3719 Promoted: true, 3720 DesiredTotal: 10, 3721 PlacedAllocs: 7, 3722 } 3723 3724 // Create 2 allocations from the old job 3725 var allocs []*structs.Allocation 3726 for i := 8; i < 10; i++ { 3727 alloc := mock.Alloc() 3728 alloc.Job = job 3729 alloc.JobID = job.ID 3730 alloc.NodeID = uuid.Generate() 3731 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3732 alloc.TaskGroup = job.TaskGroups[0].Name 3733 allocs = append(allocs, alloc) 3734 } 3735 3736 // Create the healthy replacements 3737 handled := make(map[string]allocUpdateType) 3738 for i := 0; i < 8; i++ { 3739 new := mock.Alloc() 3740 new.Job = job 3741 new.JobID = job.ID 3742 new.NodeID = uuid.Generate() 3743 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3744 new.TaskGroup = job.TaskGroups[0].Name 3745 new.DeploymentID = d.ID 3746 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3747 Healthy: helper.BoolToPtr(true), 3748 } 3749 allocs = append(allocs, new) 3750 handled[new.ID] = allocUpdateFnIgnore 3751 } 3752 3753 // Build a map of tainted nodes 3754 tainted := make(map[string]*structs.Node, 3) 3755 for i := 0; i < 3; i++ { 3756 n := mock.Node() 3757 n.ID = allocs[2+i].NodeID 3758 if i == 0 { 3759 n.Status = structs.NodeStatusDown 3760 } else { 3761 n.Drain = true 3762 allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3763 } 3764 tainted[n.ID] = n 3765 } 3766 3767 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3768 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3769 r := reconciler.Compute() 3770 3771 // Assert the correct results 3772 assertResults(t, r, &resultExpectation{ 3773 createDeployment: nil, 3774 deploymentUpdates: nil, 3775 place: 3, 3776 destructive: 2, 3777 stop: 3, 3778 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3779 job.TaskGroups[0].Name: { 3780 Place: 1, // Place the lost 3781 Stop: 1, // Stop the lost 3782 Migrate: 2, // Migrate the tainted 3783 DestructiveUpdate: 2, 3784 Ignore: 5, 3785 }, 3786 }, 3787 }) 3788 3789 assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate)) 3790 assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place)) 3791 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 3792 } 3793 3794 // Tests the reconciler handles a failed deployment and only replaces lost 3795 // deployments 3796 func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) { 3797 job := mock.Job() 3798 job.TaskGroups[0].Update = noCanaryUpdate 3799 3800 // Create an existing failed deployment that has some placed allocs 3801 d := structs.NewDeployment(job) 3802 d.Status = structs.DeploymentStatusFailed 3803 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3804 Promoted: true, 3805 DesiredTotal: 10, 3806 PlacedAllocs: 4, 3807 } 3808 3809 // Create 6 allocations from the old job 3810 var allocs []*structs.Allocation 3811 for i := 4; i < 10; i++ { 3812 alloc := mock.Alloc() 3813 alloc.Job = job 3814 alloc.JobID = job.ID 3815 alloc.NodeID = uuid.Generate() 3816 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3817 alloc.TaskGroup = job.TaskGroups[0].Name 3818 allocs = append(allocs, alloc) 3819 } 3820 3821 // Create the healthy replacements 3822 handled := make(map[string]allocUpdateType) 3823 for i := 0; i < 4; i++ { 3824 new := mock.Alloc() 3825 new.Job = job 3826 new.JobID = job.ID 3827 new.NodeID = uuid.Generate() 3828 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3829 new.TaskGroup = job.TaskGroups[0].Name 3830 new.DeploymentID = d.ID 3831 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3832 Healthy: helper.BoolToPtr(true), 3833 } 3834 allocs = append(allocs, new) 3835 handled[new.ID] = allocUpdateFnIgnore 3836 } 3837 3838 // Build a map of tainted nodes 3839 tainted := make(map[string]*structs.Node, 2) 3840 for i := 0; i < 2; i++ { 3841 n := mock.Node() 3842 n.ID = allocs[6+i].NodeID 3843 if i == 0 { 3844 n.Status = structs.NodeStatusDown 3845 } else { 3846 n.Drain = true 3847 allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3848 } 3849 tainted[n.ID] = n 3850 } 3851 3852 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3853 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3854 r := reconciler.Compute() 3855 3856 // Assert the correct results 3857 assertResults(t, r, &resultExpectation{ 3858 createDeployment: nil, 3859 deploymentUpdates: nil, 3860 place: 1, // Only replace the lost node 3861 inplace: 0, 3862 stop: 2, 3863 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3864 job.TaskGroups[0].Name: { 3865 Place: 1, 3866 Stop: 2, 3867 Ignore: 8, 3868 }, 3869 }, 3870 }) 3871 3872 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 3873 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3874 } 3875 3876 // Tests the reconciler handles a run after a deployment is complete 3877 // successfully. 3878 func TestReconciler_CompleteDeployment(t *testing.T) { 3879 job := mock.Job() 3880 job.TaskGroups[0].Update = canaryUpdate 3881 3882 d := structs.NewDeployment(job) 3883 d.Status = structs.DeploymentStatusSuccessful 3884 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3885 Promoted: true, 3886 DesiredTotal: 10, 3887 DesiredCanaries: 2, 3888 PlacedAllocs: 10, 3889 HealthyAllocs: 10, 3890 } 3891 3892 // Create allocations from the old job 3893 var allocs []*structs.Allocation 3894 for i := 0; i < 10; i++ { 3895 alloc := mock.Alloc() 3896 alloc.Job = job 3897 alloc.JobID = job.ID 3898 alloc.NodeID = uuid.Generate() 3899 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3900 alloc.TaskGroup = job.TaskGroups[0].Name 3901 alloc.DeploymentID = d.ID 3902 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 3903 Healthy: helper.BoolToPtr(true), 3904 } 3905 allocs = append(allocs, alloc) 3906 } 3907 3908 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3909 r := reconciler.Compute() 3910 3911 // Assert the correct results 3912 assertResults(t, r, &resultExpectation{ 3913 createDeployment: nil, 3914 deploymentUpdates: nil, 3915 place: 0, 3916 inplace: 0, 3917 stop: 0, 3918 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3919 job.TaskGroups[0].Name: { 3920 Ignore: 10, 3921 }, 3922 }, 3923 }) 3924 } 3925 3926 // Tests that the reconciler marks a deployment as complete once there is 3927 // nothing left to place even if there are failed allocations that are part of 3928 // the deployment. 3929 func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) { 3930 job := mock.Job() 3931 job.TaskGroups[0].Update = noCanaryUpdate 3932 3933 d := structs.NewDeployment(job) 3934 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3935 DesiredTotal: 10, 3936 PlacedAllocs: 20, 3937 HealthyAllocs: 10, 3938 } 3939 3940 // Create 10 healthy allocs and 10 allocs that are failed 3941 var allocs []*structs.Allocation 3942 for i := 0; i < 20; i++ { 3943 alloc := mock.Alloc() 3944 alloc.Job = job 3945 alloc.JobID = job.ID 3946 alloc.NodeID = uuid.Generate() 3947 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10)) 3948 alloc.TaskGroup = job.TaskGroups[0].Name 3949 alloc.DeploymentID = d.ID 3950 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{} 3951 if i < 10 { 3952 alloc.ClientStatus = structs.AllocClientStatusRunning 3953 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true) 3954 } else { 3955 alloc.DesiredStatus = structs.AllocDesiredStatusStop 3956 alloc.ClientStatus = structs.AllocClientStatusFailed 3957 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(false) 3958 } 3959 3960 allocs = append(allocs, alloc) 3961 } 3962 3963 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3964 r := reconciler.Compute() 3965 3966 updates := []*structs.DeploymentStatusUpdate{ 3967 { 3968 DeploymentID: d.ID, 3969 Status: structs.DeploymentStatusSuccessful, 3970 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3971 }, 3972 } 3973 3974 // Assert the correct results 3975 assertResults(t, r, &resultExpectation{ 3976 createDeployment: nil, 3977 deploymentUpdates: updates, 3978 place: 0, 3979 inplace: 0, 3980 stop: 0, 3981 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3982 job.TaskGroups[0].Name: { 3983 Ignore: 10, 3984 }, 3985 }, 3986 }) 3987 } 3988 3989 // Test that a failed deployment cancels non-promoted canaries 3990 func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { 3991 // Create a job with two task groups 3992 job := mock.Job() 3993 job.TaskGroups[0].Update = canaryUpdate 3994 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3995 job.TaskGroups[1].Name = "two" 3996 3997 // Create an existing failed deployment that has promoted one task group 3998 d := structs.NewDeployment(job) 3999 d.Status = structs.DeploymentStatusFailed 4000 s0 := &structs.DeploymentState{ 4001 Promoted: true, 4002 DesiredTotal: 10, 4003 DesiredCanaries: 2, 4004 PlacedAllocs: 4, 4005 } 4006 s1 := &structs.DeploymentState{ 4007 Promoted: false, 4008 DesiredTotal: 10, 4009 DesiredCanaries: 2, 4010 PlacedAllocs: 2, 4011 } 4012 d.TaskGroups[job.TaskGroups[0].Name] = s0 4013 d.TaskGroups[job.TaskGroups[1].Name] = s1 4014 4015 // Create 6 allocations from the old job 4016 var allocs []*structs.Allocation 4017 handled := make(map[string]allocUpdateType) 4018 for _, group := range []int{0, 1} { 4019 replacements := 4 4020 state := s0 4021 if group == 1 { 4022 replacements = 2 4023 state = s1 4024 } 4025 4026 // Create the healthy replacements 4027 for i := 0; i < replacements; i++ { 4028 new := mock.Alloc() 4029 new.Job = job 4030 new.JobID = job.ID 4031 new.NodeID = uuid.Generate() 4032 new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 4033 new.TaskGroup = job.TaskGroups[group].Name 4034 new.DeploymentID = d.ID 4035 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4036 Healthy: helper.BoolToPtr(true), 4037 } 4038 allocs = append(allocs, new) 4039 handled[new.ID] = allocUpdateFnIgnore 4040 4041 // Add the alloc to the canary list 4042 if i < 2 { 4043 state.PlacedCanaries = append(state.PlacedCanaries, new.ID) 4044 } 4045 } 4046 for i := replacements; i < 10; i++ { 4047 alloc := mock.Alloc() 4048 alloc.Job = job 4049 alloc.JobID = job.ID 4050 alloc.NodeID = uuid.Generate() 4051 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 4052 alloc.TaskGroup = job.TaskGroups[group].Name 4053 allocs = append(allocs, alloc) 4054 } 4055 } 4056 4057 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 4058 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 4059 r := reconciler.Compute() 4060 4061 // Assert the correct results 4062 assertResults(t, r, &resultExpectation{ 4063 createDeployment: nil, 4064 deploymentUpdates: nil, 4065 place: 0, 4066 inplace: 0, 4067 stop: 2, 4068 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4069 job.TaskGroups[0].Name: { 4070 Ignore: 10, 4071 }, 4072 job.TaskGroups[1].Name: { 4073 Stop: 2, 4074 Ignore: 8, 4075 }, 4076 }, 4077 }) 4078 4079 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 4080 } 4081 4082 // Test that a failed deployment and updated job works 4083 func TestReconciler_FailedDeployment_NewJob(t *testing.T) { 4084 job := mock.Job() 4085 job.TaskGroups[0].Update = noCanaryUpdate 4086 4087 // Create an existing failed deployment that has some placed allocs 4088 d := structs.NewDeployment(job) 4089 d.Status = structs.DeploymentStatusFailed 4090 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4091 Promoted: true, 4092 DesiredTotal: 10, 4093 PlacedAllocs: 4, 4094 } 4095 4096 // Create 6 allocations from the old job 4097 var allocs []*structs.Allocation 4098 for i := 4; i < 10; i++ { 4099 alloc := mock.Alloc() 4100 alloc.Job = job 4101 alloc.JobID = job.ID 4102 alloc.NodeID = uuid.Generate() 4103 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4104 alloc.TaskGroup = job.TaskGroups[0].Name 4105 allocs = append(allocs, alloc) 4106 } 4107 4108 // Create the healthy replacements 4109 for i := 0; i < 4; i++ { 4110 new := mock.Alloc() 4111 new.Job = job 4112 new.JobID = job.ID 4113 new.NodeID = uuid.Generate() 4114 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4115 new.TaskGroup = job.TaskGroups[0].Name 4116 new.DeploymentID = d.ID 4117 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4118 Healthy: helper.BoolToPtr(true), 4119 } 4120 allocs = append(allocs, new) 4121 } 4122 4123 // Up the job version 4124 jobNew := job.Copy() 4125 jobNew.Version += 100 4126 4127 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "") 4128 r := reconciler.Compute() 4129 4130 dnew := structs.NewDeployment(jobNew) 4131 dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4132 DesiredTotal: 10, 4133 } 4134 4135 // Assert the correct results 4136 assertResults(t, r, &resultExpectation{ 4137 createDeployment: dnew, 4138 deploymentUpdates: nil, 4139 destructive: 4, 4140 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4141 job.TaskGroups[0].Name: { 4142 DestructiveUpdate: 4, 4143 Ignore: 6, 4144 }, 4145 }, 4146 }) 4147 4148 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 4149 } 4150 4151 // Tests the reconciler marks a deployment as complete 4152 func TestReconciler_MarkDeploymentComplete(t *testing.T) { 4153 job := mock.Job() 4154 job.TaskGroups[0].Update = noCanaryUpdate 4155 4156 d := structs.NewDeployment(job) 4157 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4158 Promoted: true, 4159 DesiredTotal: 10, 4160 PlacedAllocs: 10, 4161 HealthyAllocs: 10, 4162 } 4163 4164 // Create allocations from the old job 4165 var allocs []*structs.Allocation 4166 for i := 0; i < 10; i++ { 4167 alloc := mock.Alloc() 4168 alloc.Job = job 4169 alloc.JobID = job.ID 4170 alloc.NodeID = uuid.Generate() 4171 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4172 alloc.TaskGroup = job.TaskGroups[0].Name 4173 alloc.DeploymentID = d.ID 4174 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 4175 Healthy: helper.BoolToPtr(true), 4176 } 4177 allocs = append(allocs, alloc) 4178 } 4179 4180 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 4181 r := reconciler.Compute() 4182 4183 updates := []*structs.DeploymentStatusUpdate{ 4184 { 4185 DeploymentID: d.ID, 4186 Status: structs.DeploymentStatusSuccessful, 4187 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 4188 }, 4189 } 4190 4191 // Assert the correct results 4192 assertResults(t, r, &resultExpectation{ 4193 createDeployment: nil, 4194 deploymentUpdates: updates, 4195 place: 0, 4196 inplace: 0, 4197 stop: 0, 4198 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4199 job.TaskGroups[0].Name: { 4200 Ignore: 10, 4201 }, 4202 }, 4203 }) 4204 } 4205 4206 // Tests the reconciler handles changing a job such that a deployment is created 4207 // while doing a scale up but as the second eval. 4208 func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { 4209 // Scale the job up to 15 4210 job := mock.Job() 4211 job.TaskGroups[0].Update = noCanaryUpdate 4212 job.TaskGroups[0].Count = 30 4213 4214 // Create a deployment that is paused and has placed some canaries 4215 d := structs.NewDeployment(job) 4216 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4217 Promoted: false, 4218 DesiredTotal: 30, 4219 PlacedAllocs: 20, 4220 } 4221 4222 // Create 10 allocations from the old job 4223 var allocs []*structs.Allocation 4224 for i := 0; i < 10; i++ { 4225 alloc := mock.Alloc() 4226 alloc.Job = job 4227 alloc.JobID = job.ID 4228 alloc.NodeID = uuid.Generate() 4229 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4230 alloc.TaskGroup = job.TaskGroups[0].Name 4231 allocs = append(allocs, alloc) 4232 } 4233 4234 // Create 20 from new job 4235 handled := make(map[string]allocUpdateType) 4236 for i := 10; i < 30; i++ { 4237 alloc := mock.Alloc() 4238 alloc.Job = job 4239 alloc.JobID = job.ID 4240 alloc.DeploymentID = d.ID 4241 alloc.NodeID = uuid.Generate() 4242 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4243 alloc.TaskGroup = job.TaskGroups[0].Name 4244 allocs = append(allocs, alloc) 4245 handled[alloc.ID] = allocUpdateFnIgnore 4246 } 4247 4248 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 4249 reconciler := NewAllocReconciler(testlog.Logger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 4250 r := reconciler.Compute() 4251 4252 // Assert the correct results 4253 assertResults(t, r, &resultExpectation{ 4254 createDeployment: nil, 4255 deploymentUpdates: nil, 4256 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4257 job.TaskGroups[0].Name: { 4258 // All should be ignored because nothing has been marked as 4259 // healthy. 4260 Ignore: 30, 4261 }, 4262 }, 4263 }) 4264 } 4265 4266 // Tests the reconciler doesn't stop allocations when doing a rolling upgrade 4267 // where the count of the old job allocs is < desired count. 4268 func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { 4269 job := mock.Job() 4270 job.TaskGroups[0].Update = noCanaryUpdate 4271 4272 // Create 7 allocations from the old job 4273 var allocs []*structs.Allocation 4274 for i := 0; i < 7; i++ { 4275 alloc := mock.Alloc() 4276 alloc.Job = job 4277 alloc.JobID = job.ID 4278 alloc.NodeID = uuid.Generate() 4279 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4280 alloc.TaskGroup = job.TaskGroups[0].Name 4281 allocs = append(allocs, alloc) 4282 } 4283 4284 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 4285 r := reconciler.Compute() 4286 4287 d := structs.NewDeployment(job) 4288 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4289 DesiredTotal: 10, 4290 } 4291 4292 // Assert the correct results 4293 assertResults(t, r, &resultExpectation{ 4294 createDeployment: d, 4295 deploymentUpdates: nil, 4296 place: 3, 4297 destructive: 1, 4298 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4299 job.TaskGroups[0].Name: { 4300 Place: 3, 4301 DestructiveUpdate: 1, 4302 Ignore: 6, 4303 }, 4304 }, 4305 }) 4306 4307 assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place)) 4308 assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate)) 4309 } 4310 4311 // Tests that the reconciler handles rerunning a batch job in the case that the 4312 // allocations are from an older instance of the job. 4313 func TestReconciler_Batch_Rerun(t *testing.T) { 4314 job := mock.Job() 4315 job.Type = structs.JobTypeBatch 4316 job.TaskGroups[0].Update = nil 4317 4318 // Create 10 allocations from the old job and have them be complete 4319 var allocs []*structs.Allocation 4320 for i := 0; i < 10; i++ { 4321 alloc := mock.Alloc() 4322 alloc.Job = job 4323 alloc.JobID = job.ID 4324 alloc.NodeID = uuid.Generate() 4325 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4326 alloc.TaskGroup = job.TaskGroups[0].Name 4327 alloc.ClientStatus = structs.AllocClientStatusComplete 4328 alloc.DesiredStatus = structs.AllocDesiredStatusStop 4329 allocs = append(allocs, alloc) 4330 } 4331 4332 // Create a copy of the job that is "new" 4333 job2 := job.Copy() 4334 job2.CreateIndex++ 4335 4336 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "") 4337 r := reconciler.Compute() 4338 4339 // Assert the correct results 4340 assertResults(t, r, &resultExpectation{ 4341 createDeployment: nil, 4342 deploymentUpdates: nil, 4343 place: 10, 4344 destructive: 0, 4345 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4346 job.TaskGroups[0].Name: { 4347 Place: 10, 4348 DestructiveUpdate: 0, 4349 Ignore: 10, 4350 }, 4351 }, 4352 }) 4353 4354 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 4355 } 4356 4357 // Test that a failed deployment will not result in rescheduling failed allocations 4358 func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { 4359 job := mock.Job() 4360 job.TaskGroups[0].Update = noCanaryUpdate 4361 4362 tgName := job.TaskGroups[0].Name 4363 now := time.Now() 4364 // Create an existing failed deployment that has some placed allocs 4365 d := structs.NewDeployment(job) 4366 d.Status = structs.DeploymentStatusFailed 4367 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4368 Promoted: true, 4369 DesiredTotal: 5, 4370 PlacedAllocs: 4, 4371 } 4372 4373 // Create 4 allocations and mark two as failed 4374 var allocs []*structs.Allocation 4375 for i := 0; i < 4; i++ { 4376 alloc := mock.Alloc() 4377 alloc.Job = job 4378 alloc.JobID = job.ID 4379 alloc.NodeID = uuid.Generate() 4380 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4381 alloc.TaskGroup = job.TaskGroups[0].Name 4382 alloc.DeploymentID = d.ID 4383 allocs = append(allocs, alloc) 4384 } 4385 4386 //create some allocations that are reschedulable now 4387 allocs[2].ClientStatus = structs.AllocClientStatusFailed 4388 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4389 StartedAt: now.Add(-1 * time.Hour), 4390 FinishedAt: now.Add(-10 * time.Second)}} 4391 4392 allocs[3].ClientStatus = structs.AllocClientStatusFailed 4393 allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4394 StartedAt: now.Add(-1 * time.Hour), 4395 FinishedAt: now.Add(-10 * time.Second)}} 4396 4397 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4398 r := reconciler.Compute() 4399 4400 // Assert that no rescheduled placements were created 4401 assertResults(t, r, &resultExpectation{ 4402 place: 0, 4403 createDeployment: nil, 4404 deploymentUpdates: nil, 4405 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4406 job.TaskGroups[0].Name: { 4407 Ignore: 2, 4408 }, 4409 }, 4410 }) 4411 } 4412 4413 // Test that a running deployment with failed allocs will not result in 4414 // rescheduling failed allocations unless they are marked as reschedulable. 4415 func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { 4416 job := mock.Job() 4417 job.TaskGroups[0].Update = noCanaryUpdate 4418 tgName := job.TaskGroups[0].Name 4419 now := time.Now() 4420 4421 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 4422 d := structs.NewDeployment(job) 4423 d.Status = structs.DeploymentStatusRunning 4424 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4425 Promoted: false, 4426 DesiredTotal: 10, 4427 PlacedAllocs: 10, 4428 } 4429 4430 // Create 10 allocations 4431 var allocs []*structs.Allocation 4432 for i := 0; i < 10; i++ { 4433 alloc := mock.Alloc() 4434 alloc.Job = job 4435 alloc.JobID = job.ID 4436 alloc.NodeID = uuid.Generate() 4437 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4438 alloc.TaskGroup = job.TaskGroups[0].Name 4439 alloc.DeploymentID = d.ID 4440 alloc.ClientStatus = structs.AllocClientStatusFailed 4441 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4442 StartedAt: now.Add(-1 * time.Hour), 4443 FinishedAt: now.Add(-10 * time.Second)}} 4444 allocs = append(allocs, alloc) 4445 } 4446 4447 // Mark half of them as reschedulable 4448 for i := 0; i < 5; i++ { 4449 allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true) 4450 } 4451 4452 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4453 r := reconciler.Compute() 4454 4455 // Assert that no rescheduled placements were created 4456 assertResults(t, r, &resultExpectation{ 4457 place: 5, 4458 createDeployment: nil, 4459 deploymentUpdates: nil, 4460 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4461 job.TaskGroups[0].Name: { 4462 Place: 5, 4463 Ignore: 5, 4464 }, 4465 }, 4466 }) 4467 } 4468 4469 // Test that a failed deployment cancels non-promoted canaries 4470 func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { 4471 // Create a job 4472 job := mock.Job() 4473 job.TaskGroups[0].Count = 3 4474 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 4475 Canary: 3, 4476 MaxParallel: 2, 4477 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 4478 MinHealthyTime: 10 * time.Second, 4479 HealthyDeadline: 10 * time.Minute, 4480 Stagger: 31 * time.Second, 4481 } 4482 4483 // Create v1 of the job 4484 jobv1 := job.Copy() 4485 jobv1.Version = 1 4486 jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"} 4487 4488 // Create v2 of the job 4489 jobv2 := job.Copy() 4490 jobv2.Version = 2 4491 jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"} 4492 4493 d := structs.NewDeployment(jobv2) 4494 state := &structs.DeploymentState{ 4495 Promoted: true, 4496 DesiredTotal: 3, 4497 PlacedAllocs: 3, 4498 HealthyAllocs: 3, 4499 } 4500 d.TaskGroups[job.TaskGroups[0].Name] = state 4501 4502 // Create the original 4503 var allocs []*structs.Allocation 4504 for i := 0; i < 3; i++ { 4505 new := mock.Alloc() 4506 new.Job = jobv2 4507 new.JobID = job.ID 4508 new.NodeID = uuid.Generate() 4509 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4510 new.TaskGroup = job.TaskGroups[0].Name 4511 new.DeploymentID = d.ID 4512 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4513 Healthy: helper.BoolToPtr(true), 4514 } 4515 new.ClientStatus = structs.AllocClientStatusRunning 4516 allocs = append(allocs, new) 4517 4518 } 4519 for i := 0; i < 3; i++ { 4520 new := mock.Alloc() 4521 new.Job = jobv1 4522 new.JobID = jobv1.ID 4523 new.NodeID = uuid.Generate() 4524 new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i)) 4525 new.TaskGroup = job.TaskGroups[0].Name 4526 new.DeploymentID = uuid.Generate() 4527 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4528 Healthy: helper.BoolToPtr(false), 4529 } 4530 new.DesiredStatus = structs.AllocDesiredStatusStop 4531 new.ClientStatus = structs.AllocClientStatusFailed 4532 allocs = append(allocs, new) 4533 } 4534 4535 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "") 4536 r := reconciler.Compute() 4537 4538 updates := []*structs.DeploymentStatusUpdate{ 4539 { 4540 DeploymentID: d.ID, 4541 Status: structs.DeploymentStatusSuccessful, 4542 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 4543 }, 4544 } 4545 4546 // Assert the correct results 4547 assertResults(t, r, &resultExpectation{ 4548 createDeployment: nil, 4549 deploymentUpdates: updates, 4550 place: 0, 4551 inplace: 0, 4552 stop: 0, 4553 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4554 job.TaskGroups[0].Name: { 4555 Stop: 0, 4556 InPlaceUpdate: 0, 4557 Ignore: 3, 4558 }, 4559 }, 4560 }) 4561 } 4562 4563 // Test that a successful deployment with failed allocs will result in 4564 // rescheduling failed allocations 4565 func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) { 4566 job := mock.Job() 4567 job.TaskGroups[0].Update = noCanaryUpdate 4568 tgName := job.TaskGroups[0].Name 4569 now := time.Now() 4570 4571 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 4572 d := structs.NewDeployment(job) 4573 d.Status = structs.DeploymentStatusSuccessful 4574 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4575 Promoted: false, 4576 DesiredTotal: 10, 4577 PlacedAllocs: 10, 4578 } 4579 4580 // Create 10 allocations 4581 var allocs []*structs.Allocation 4582 for i := 0; i < 10; i++ { 4583 alloc := mock.Alloc() 4584 alloc.Job = job 4585 alloc.JobID = job.ID 4586 alloc.NodeID = uuid.Generate() 4587 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4588 alloc.TaskGroup = job.TaskGroups[0].Name 4589 alloc.DeploymentID = d.ID 4590 alloc.ClientStatus = structs.AllocClientStatusFailed 4591 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4592 StartedAt: now.Add(-1 * time.Hour), 4593 FinishedAt: now.Add(-10 * time.Second)}} 4594 allocs = append(allocs, alloc) 4595 } 4596 4597 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4598 r := reconciler.Compute() 4599 4600 // Assert that rescheduled placements were created 4601 assertResults(t, r, &resultExpectation{ 4602 place: 10, 4603 createDeployment: nil, 4604 deploymentUpdates: nil, 4605 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4606 job.TaskGroups[0].Name: { 4607 Place: 10, 4608 Ignore: 0, 4609 }, 4610 }, 4611 }) 4612 assertPlaceResultsHavePreviousAllocs(t, 10, r.place) 4613 } 4614 4615 // Tests force rescheduling a failed alloc that is past its reschedule limit 4616 func TestReconciler_ForceReschedule_Service(t *testing.T) { 4617 require := require.New(t) 4618 4619 // Set desired 5 4620 job := mock.Job() 4621 job.TaskGroups[0].Count = 5 4622 tgName := job.TaskGroups[0].Name 4623 4624 // Set up reschedule policy and update stanza 4625 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 4626 Attempts: 1, 4627 Interval: 24 * time.Hour, 4628 Delay: 5 * time.Second, 4629 DelayFunction: "", 4630 MaxDelay: 1 * time.Hour, 4631 Unlimited: false, 4632 } 4633 job.TaskGroups[0].Update = noCanaryUpdate 4634 4635 // Create 5 existing allocations 4636 var allocs []*structs.Allocation 4637 for i := 0; i < 5; i++ { 4638 alloc := mock.Alloc() 4639 alloc.Job = job 4640 alloc.JobID = job.ID 4641 alloc.NodeID = uuid.Generate() 4642 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4643 allocs = append(allocs, alloc) 4644 alloc.ClientStatus = structs.AllocClientStatusRunning 4645 } 4646 4647 // Mark one as failed and past its reschedule limit so not eligible to reschedule 4648 allocs[0].ClientStatus = structs.AllocClientStatusFailed 4649 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 4650 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 4651 PrevAllocID: uuid.Generate(), 4652 PrevNodeID: uuid.Generate(), 4653 }, 4654 }} 4655 4656 // Mark DesiredTransition ForceReschedule 4657 allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)} 4658 4659 reconciler := NewAllocReconciler(testlog.Logger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 4660 r := reconciler.Compute() 4661 4662 // Verify that no follow up evals were created 4663 evals := r.desiredFollowupEvals[tgName] 4664 require.Nil(evals) 4665 4666 // Verify that one rescheduled alloc was created because of the forced reschedule 4667 assertResults(t, r, &resultExpectation{ 4668 createDeployment: nil, 4669 deploymentUpdates: nil, 4670 place: 1, 4671 inplace: 0, 4672 stop: 0, 4673 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4674 job.TaskGroups[0].Name: { 4675 Place: 1, 4676 Ignore: 4, 4677 }, 4678 }, 4679 }) 4680 4681 // Rescheduled allocs should have previous allocs 4682 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 4683 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 4684 assertPlacementsAreRescheduled(t, 1, r.place) 4685 }