github.com/smithx10/nomad@v0.9.1-rc1/scheduler/reconcile_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "reflect" 6 "regexp" 7 "strconv" 8 "testing" 9 "time" 10 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/helper/testlog" 13 "github.com/hashicorp/nomad/helper/uuid" 14 "github.com/hashicorp/nomad/nomad/mock" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/kr/pretty" 17 "github.com/stretchr/testify/assert" 18 "github.com/stretchr/testify/require" 19 ) 20 21 var ( 22 canaryUpdate = &structs.UpdateStrategy{ 23 Canary: 2, 24 MaxParallel: 2, 25 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 26 MinHealthyTime: 10 * time.Second, 27 HealthyDeadline: 10 * time.Minute, 28 Stagger: 31 * time.Second, 29 } 30 31 noCanaryUpdate = &structs.UpdateStrategy{ 32 MaxParallel: 4, 33 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 34 MinHealthyTime: 10 * time.Second, 35 HealthyDeadline: 10 * time.Minute, 36 Stagger: 31 * time.Second, 37 } 38 ) 39 40 func allocUpdateFnIgnore(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 41 return true, false, nil 42 } 43 44 func allocUpdateFnDestructive(*structs.Allocation, *structs.Job, *structs.TaskGroup) (bool, bool, *structs.Allocation) { 45 return false, true, nil 46 } 47 48 func allocUpdateFnInplace(existing *structs.Allocation, _ *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 49 // Create a shallow copy 50 newAlloc := existing.CopySkipJob() 51 newAlloc.AllocatedResources = &structs.AllocatedResources{ 52 Tasks: map[string]*structs.AllocatedTaskResources{}, 53 Shared: structs.AllocatedSharedResources{ 54 DiskMB: int64(newTG.EphemeralDisk.SizeMB), 55 }, 56 } 57 58 // Use the new task resources but keep the network from the old 59 for _, task := range newTG.Tasks { 60 networks := existing.AllocatedResources.Tasks[task.Name].Copy().Networks 61 newAlloc.AllocatedResources.Tasks[task.Name] = &structs.AllocatedTaskResources{ 62 Cpu: structs.AllocatedCpuResources{ 63 CpuShares: int64(task.Resources.CPU), 64 }, 65 Memory: structs.AllocatedMemoryResources{ 66 MemoryMB: int64(task.Resources.MemoryMB), 67 }, 68 Networks: networks, 69 } 70 } 71 72 return false, false, newAlloc 73 } 74 75 func allocUpdateFnMock(handled map[string]allocUpdateType, unhandled allocUpdateType) allocUpdateType { 76 return func(existing *structs.Allocation, newJob *structs.Job, newTG *structs.TaskGroup) (bool, bool, *structs.Allocation) { 77 if fn, ok := handled[existing.ID]; ok { 78 return fn(existing, newJob, newTG) 79 } 80 81 return unhandled(existing, newJob, newTG) 82 } 83 } 84 85 var ( 86 // AllocationIndexRegex is a regular expression to find the allocation index. 87 allocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 88 ) 89 90 // allocNameToIndex returns the index of the allocation. 91 func allocNameToIndex(name string) uint { 92 matches := allocationIndexRegex.FindStringSubmatch(name) 93 if len(matches) != 2 { 94 return 0 95 } 96 97 index, err := strconv.Atoi(matches[1]) 98 if err != nil { 99 return 0 100 } 101 102 return uint(index) 103 } 104 105 func assertNamesHaveIndexes(t *testing.T, indexes []int, names []string) { 106 t.Helper() 107 m := make(map[uint]int) 108 for _, i := range indexes { 109 m[uint(i)] += 1 110 } 111 112 for _, n := range names { 113 index := allocNameToIndex(n) 114 val, contained := m[index] 115 if !contained { 116 t.Fatalf("Unexpected index %d from name %s\nAll names: %v", index, n, names) 117 } 118 119 val-- 120 if val < 0 { 121 t.Fatalf("Index %d repeated too many times\nAll names: %v", index, names) 122 } 123 m[index] = val 124 } 125 126 for k, remainder := range m { 127 if remainder != 0 { 128 t.Fatalf("Index %d has %d remaining uses expected\nAll names: %v", k, remainder, names) 129 } 130 } 131 } 132 133 func assertNoCanariesStopped(t *testing.T, d *structs.Deployment, stop []allocStopResult) { 134 t.Helper() 135 canaryIndex := make(map[string]struct{}) 136 for _, state := range d.TaskGroups { 137 for _, c := range state.PlacedCanaries { 138 canaryIndex[c] = struct{}{} 139 } 140 } 141 142 for _, s := range stop { 143 if _, ok := canaryIndex[s.alloc.ID]; ok { 144 t.Fatalf("Stopping canary alloc %q %q", s.alloc.ID, s.alloc.Name) 145 } 146 } 147 } 148 149 func assertPlaceResultsHavePreviousAllocs(t *testing.T, numPrevious int, place []allocPlaceResult) { 150 t.Helper() 151 names := make(map[string]struct{}, numPrevious) 152 153 found := 0 154 for _, p := range place { 155 if _, ok := names[p.name]; ok { 156 t.Fatalf("Name %q already placed", p.name) 157 } 158 names[p.name] = struct{}{} 159 160 if p.previousAlloc == nil { 161 continue 162 } 163 164 if act := p.previousAlloc.Name; p.name != act { 165 t.Fatalf("Name mismatch on previous alloc; got %q; want %q", act, p.name) 166 } 167 found++ 168 } 169 if numPrevious != found { 170 t.Fatalf("wanted %d; got %d placements with previous allocs", numPrevious, found) 171 } 172 } 173 174 func assertPlacementsAreRescheduled(t *testing.T, numRescheduled int, place []allocPlaceResult) { 175 t.Helper() 176 names := make(map[string]struct{}, numRescheduled) 177 178 found := 0 179 for _, p := range place { 180 if _, ok := names[p.name]; ok { 181 t.Fatalf("Name %q already placed", p.name) 182 } 183 names[p.name] = struct{}{} 184 185 if p.previousAlloc == nil { 186 continue 187 } 188 if p.reschedule { 189 found++ 190 } 191 192 } 193 if numRescheduled != found { 194 t.Fatalf("wanted %d; got %d placements that are rescheduled", numRescheduled, found) 195 } 196 } 197 198 func intRange(pairs ...int) []int { 199 if len(pairs)%2 != 0 { 200 return nil 201 } 202 203 var r []int 204 for i := 0; i < len(pairs); i += 2 { 205 for j := pairs[i]; j <= pairs[i+1]; j++ { 206 r = append(r, j) 207 } 208 } 209 return r 210 } 211 212 func placeResultsToNames(place []allocPlaceResult) []string { 213 names := make([]string, 0, len(place)) 214 for _, p := range place { 215 names = append(names, p.name) 216 } 217 return names 218 } 219 220 func destructiveResultsToNames(destructive []allocDestructiveResult) []string { 221 names := make([]string, 0, len(destructive)) 222 for _, d := range destructive { 223 names = append(names, d.placeName) 224 } 225 return names 226 } 227 228 func stopResultsToNames(stop []allocStopResult) []string { 229 names := make([]string, 0, len(stop)) 230 for _, s := range stop { 231 names = append(names, s.alloc.Name) 232 } 233 return names 234 } 235 236 func attributeUpdatesToNames(attributeUpdates map[string]*structs.Allocation) []string { 237 names := make([]string, 0, len(attributeUpdates)) 238 for _, a := range attributeUpdates { 239 names = append(names, a.Name) 240 } 241 return names 242 } 243 244 func allocsToNames(allocs []*structs.Allocation) []string { 245 names := make([]string, 0, len(allocs)) 246 for _, a := range allocs { 247 names = append(names, a.Name) 248 } 249 return names 250 } 251 252 type resultExpectation struct { 253 createDeployment *structs.Deployment 254 deploymentUpdates []*structs.DeploymentStatusUpdate 255 place int 256 destructive int 257 inplace int 258 attributeUpdates int 259 stop int 260 desiredTGUpdates map[string]*structs.DesiredUpdates 261 } 262 263 func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { 264 t.Helper() 265 assert := assert.New(t) 266 267 if exp.createDeployment != nil && r.deployment == nil { 268 t.Errorf("Expect a created deployment got none") 269 } else if exp.createDeployment == nil && r.deployment != nil { 270 t.Errorf("Expect no created deployment; got %#v", r.deployment) 271 } else if exp.createDeployment != nil && r.deployment != nil { 272 // Clear the deployment ID 273 r.deployment.ID, exp.createDeployment.ID = "", "" 274 if !reflect.DeepEqual(r.deployment, exp.createDeployment) { 275 t.Errorf("Unexpected createdDeployment; got\n %#v\nwant\n%#v\nDiff: %v", 276 r.deployment, exp.createDeployment, pretty.Diff(r.deployment, exp.createDeployment)) 277 } 278 } 279 280 assert.EqualValues(exp.deploymentUpdates, r.deploymentUpdates, "Expected Deployment Updates") 281 assert.Len(r.place, exp.place, "Expected Placements") 282 assert.Len(r.destructiveUpdate, exp.destructive, "Expected Destructive") 283 assert.Len(r.inplaceUpdate, exp.inplace, "Expected Inplace Updates") 284 assert.Len(r.attributeUpdates, exp.attributeUpdates, "Expected Attribute Updates") 285 assert.Len(r.stop, exp.stop, "Expected Stops") 286 assert.EqualValues(exp.desiredTGUpdates, r.desiredTGUpdates, "Expected Desired TG Update Annotations") 287 } 288 289 // Tests the reconciler properly handles placements for a job that has no 290 // existing allocations 291 func TestReconciler_Place_NoExisting(t *testing.T) { 292 job := mock.Job() 293 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, nil, nil, "") 294 r := reconciler.Compute() 295 296 // Assert the correct results 297 assertResults(t, r, &resultExpectation{ 298 createDeployment: nil, 299 deploymentUpdates: nil, 300 place: 10, 301 inplace: 0, 302 stop: 0, 303 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 304 job.TaskGroups[0].Name: { 305 Place: 10, 306 }, 307 }, 308 }) 309 310 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 311 } 312 313 // Tests the reconciler properly handles placements for a job that has some 314 // existing allocations 315 func TestReconciler_Place_Existing(t *testing.T) { 316 job := mock.Job() 317 318 // Create 3 existing allocations 319 var allocs []*structs.Allocation 320 for i := 0; i < 5; i++ { 321 alloc := mock.Alloc() 322 alloc.Job = job 323 alloc.JobID = job.ID 324 alloc.NodeID = uuid.Generate() 325 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 326 allocs = append(allocs, alloc) 327 } 328 329 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 330 r := reconciler.Compute() 331 332 // Assert the correct results 333 assertResults(t, r, &resultExpectation{ 334 createDeployment: nil, 335 deploymentUpdates: nil, 336 place: 5, 337 inplace: 0, 338 stop: 0, 339 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 340 job.TaskGroups[0].Name: { 341 Place: 5, 342 Ignore: 5, 343 }, 344 }, 345 }) 346 347 assertNamesHaveIndexes(t, intRange(5, 9), placeResultsToNames(r.place)) 348 } 349 350 // Tests the reconciler properly handles stopping allocations for a job that has 351 // scaled down 352 func TestReconciler_ScaleDown_Partial(t *testing.T) { 353 // Has desired 10 354 job := mock.Job() 355 356 // Create 20 existing allocations 357 var allocs []*structs.Allocation 358 for i := 0; i < 20; i++ { 359 alloc := mock.Alloc() 360 alloc.Job = job 361 alloc.JobID = job.ID 362 alloc.NodeID = uuid.Generate() 363 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 364 allocs = append(allocs, alloc) 365 } 366 367 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 368 r := reconciler.Compute() 369 370 // Assert the correct results 371 assertResults(t, r, &resultExpectation{ 372 createDeployment: nil, 373 deploymentUpdates: nil, 374 place: 0, 375 inplace: 0, 376 stop: 10, 377 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 378 job.TaskGroups[0].Name: { 379 Ignore: 10, 380 Stop: 10, 381 }, 382 }, 383 }) 384 385 assertNamesHaveIndexes(t, intRange(10, 19), stopResultsToNames(r.stop)) 386 } 387 388 // Tests the reconciler properly handles stopping allocations for a job that has 389 // scaled down to zero desired 390 func TestReconciler_ScaleDown_Zero(t *testing.T) { 391 // Set desired 0 392 job := mock.Job() 393 job.TaskGroups[0].Count = 0 394 395 // Create 20 existing allocations 396 var allocs []*structs.Allocation 397 for i := 0; i < 20; i++ { 398 alloc := mock.Alloc() 399 alloc.Job = job 400 alloc.JobID = job.ID 401 alloc.NodeID = uuid.Generate() 402 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 403 allocs = append(allocs, alloc) 404 } 405 406 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 407 r := reconciler.Compute() 408 409 // Assert the correct results 410 assertResults(t, r, &resultExpectation{ 411 createDeployment: nil, 412 deploymentUpdates: nil, 413 place: 0, 414 inplace: 0, 415 stop: 20, 416 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 417 job.TaskGroups[0].Name: { 418 Stop: 20, 419 }, 420 }, 421 }) 422 423 assertNamesHaveIndexes(t, intRange(0, 19), stopResultsToNames(r.stop)) 424 } 425 426 // Tests the reconciler properly handles stopping allocations for a job that has 427 // scaled down to zero desired where allocs have duplicate names 428 func TestReconciler_ScaleDown_Zero_DuplicateNames(t *testing.T) { 429 // Set desired 0 430 job := mock.Job() 431 job.TaskGroups[0].Count = 0 432 433 // Create 20 existing allocations 434 var allocs []*structs.Allocation 435 var expectedStopped []int 436 for i := 0; i < 20; i++ { 437 alloc := mock.Alloc() 438 alloc.Job = job 439 alloc.JobID = job.ID 440 alloc.NodeID = uuid.Generate() 441 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 442 allocs = append(allocs, alloc) 443 expectedStopped = append(expectedStopped, i%2) 444 } 445 446 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 447 r := reconciler.Compute() 448 449 // Assert the correct results 450 assertResults(t, r, &resultExpectation{ 451 createDeployment: nil, 452 deploymentUpdates: nil, 453 place: 0, 454 inplace: 0, 455 stop: 20, 456 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 457 job.TaskGroups[0].Name: { 458 Stop: 20, 459 }, 460 }, 461 }) 462 463 assertNamesHaveIndexes(t, expectedStopped, stopResultsToNames(r.stop)) 464 } 465 466 // Tests the reconciler properly handles inplace upgrading allocations 467 func TestReconciler_Inplace(t *testing.T) { 468 job := mock.Job() 469 470 // Create 10 existing allocations 471 var allocs []*structs.Allocation 472 for i := 0; i < 10; i++ { 473 alloc := mock.Alloc() 474 alloc.Job = job 475 alloc.JobID = job.ID 476 alloc.NodeID = uuid.Generate() 477 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 478 allocs = append(allocs, alloc) 479 } 480 481 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 482 r := reconciler.Compute() 483 484 // Assert the correct results 485 assertResults(t, r, &resultExpectation{ 486 createDeployment: nil, 487 deploymentUpdates: nil, 488 place: 0, 489 inplace: 10, 490 stop: 0, 491 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 492 job.TaskGroups[0].Name: { 493 InPlaceUpdate: 10, 494 }, 495 }, 496 }) 497 498 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 499 } 500 501 // Tests the reconciler properly handles inplace upgrading allocations while 502 // scaling up 503 func TestReconciler_Inplace_ScaleUp(t *testing.T) { 504 // Set desired 15 505 job := mock.Job() 506 job.TaskGroups[0].Count = 15 507 508 // Create 10 existing allocations 509 var allocs []*structs.Allocation 510 for i := 0; i < 10; i++ { 511 alloc := mock.Alloc() 512 alloc.Job = job 513 alloc.JobID = job.ID 514 alloc.NodeID = uuid.Generate() 515 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 516 allocs = append(allocs, alloc) 517 } 518 519 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 520 r := reconciler.Compute() 521 522 // Assert the correct results 523 assertResults(t, r, &resultExpectation{ 524 createDeployment: nil, 525 deploymentUpdates: nil, 526 place: 5, 527 inplace: 10, 528 stop: 0, 529 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 530 job.TaskGroups[0].Name: { 531 Place: 5, 532 InPlaceUpdate: 10, 533 }, 534 }, 535 }) 536 537 assertNamesHaveIndexes(t, intRange(0, 9), allocsToNames(r.inplaceUpdate)) 538 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 539 } 540 541 // Tests the reconciler properly handles inplace upgrading allocations while 542 // scaling down 543 func TestReconciler_Inplace_ScaleDown(t *testing.T) { 544 // Set desired 5 545 job := mock.Job() 546 job.TaskGroups[0].Count = 5 547 548 // Create 10 existing allocations 549 var allocs []*structs.Allocation 550 for i := 0; i < 10; i++ { 551 alloc := mock.Alloc() 552 alloc.Job = job 553 alloc.JobID = job.ID 554 alloc.NodeID = uuid.Generate() 555 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 556 allocs = append(allocs, alloc) 557 } 558 559 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 560 r := reconciler.Compute() 561 562 // Assert the correct results 563 assertResults(t, r, &resultExpectation{ 564 createDeployment: nil, 565 deploymentUpdates: nil, 566 place: 0, 567 inplace: 5, 568 stop: 5, 569 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 570 job.TaskGroups[0].Name: { 571 Stop: 5, 572 InPlaceUpdate: 5, 573 }, 574 }, 575 }) 576 577 assertNamesHaveIndexes(t, intRange(0, 4), allocsToNames(r.inplaceUpdate)) 578 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 579 } 580 581 // Tests the reconciler properly handles destructive upgrading allocations 582 func TestReconciler_Destructive(t *testing.T) { 583 job := mock.Job() 584 585 // Create 10 existing allocations 586 var allocs []*structs.Allocation 587 for i := 0; i < 10; i++ { 588 alloc := mock.Alloc() 589 alloc.Job = job 590 alloc.JobID = job.ID 591 alloc.NodeID = uuid.Generate() 592 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 593 allocs = append(allocs, alloc) 594 } 595 596 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 597 r := reconciler.Compute() 598 599 // Assert the correct results 600 assertResults(t, r, &resultExpectation{ 601 createDeployment: nil, 602 deploymentUpdates: nil, 603 destructive: 10, 604 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 605 job.TaskGroups[0].Name: { 606 DestructiveUpdate: 10, 607 }, 608 }, 609 }) 610 611 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 612 } 613 614 // Tests the reconciler properly handles destructive upgrading allocations while 615 // scaling up 616 func TestReconciler_Destructive_ScaleUp(t *testing.T) { 617 // Set desired 15 618 job := mock.Job() 619 job.TaskGroups[0].Count = 15 620 621 // Create 10 existing allocations 622 var allocs []*structs.Allocation 623 for i := 0; i < 10; i++ { 624 alloc := mock.Alloc() 625 alloc.Job = job 626 alloc.JobID = job.ID 627 alloc.NodeID = uuid.Generate() 628 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 629 allocs = append(allocs, alloc) 630 } 631 632 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 633 r := reconciler.Compute() 634 635 // Assert the correct results 636 assertResults(t, r, &resultExpectation{ 637 createDeployment: nil, 638 deploymentUpdates: nil, 639 place: 5, 640 destructive: 10, 641 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 642 job.TaskGroups[0].Name: { 643 Place: 5, 644 DestructiveUpdate: 10, 645 }, 646 }, 647 }) 648 649 assertNamesHaveIndexes(t, intRange(0, 9), destructiveResultsToNames(r.destructiveUpdate)) 650 assertNamesHaveIndexes(t, intRange(10, 14), placeResultsToNames(r.place)) 651 } 652 653 // Tests the reconciler properly handles destructive upgrading allocations while 654 // scaling down 655 func TestReconciler_Destructive_ScaleDown(t *testing.T) { 656 // Set desired 5 657 job := mock.Job() 658 job.TaskGroups[0].Count = 5 659 660 // Create 10 existing allocations 661 var allocs []*structs.Allocation 662 for i := 0; i < 10; i++ { 663 alloc := mock.Alloc() 664 alloc.Job = job 665 alloc.JobID = job.ID 666 alloc.NodeID = uuid.Generate() 667 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 668 allocs = append(allocs, alloc) 669 } 670 671 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 672 r := reconciler.Compute() 673 674 // Assert the correct results 675 assertResults(t, r, &resultExpectation{ 676 createDeployment: nil, 677 deploymentUpdates: nil, 678 destructive: 5, 679 stop: 5, 680 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 681 job.TaskGroups[0].Name: { 682 Stop: 5, 683 DestructiveUpdate: 5, 684 }, 685 }, 686 }) 687 688 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 689 assertNamesHaveIndexes(t, intRange(0, 4), destructiveResultsToNames(r.destructiveUpdate)) 690 } 691 692 // Tests the reconciler properly handles lost nodes with allocations 693 func TestReconciler_LostNode(t *testing.T) { 694 job := mock.Job() 695 696 // Create 10 existing allocations 697 var allocs []*structs.Allocation 698 for i := 0; i < 10; i++ { 699 alloc := mock.Alloc() 700 alloc.Job = job 701 alloc.JobID = job.ID 702 alloc.NodeID = uuid.Generate() 703 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 704 allocs = append(allocs, alloc) 705 } 706 707 // Build a map of tainted nodes 708 tainted := make(map[string]*structs.Node, 2) 709 for i := 0; i < 2; i++ { 710 n := mock.Node() 711 n.ID = allocs[i].NodeID 712 n.Status = structs.NodeStatusDown 713 tainted[n.ID] = n 714 } 715 716 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 717 r := reconciler.Compute() 718 719 // Assert the correct results 720 assertResults(t, r, &resultExpectation{ 721 createDeployment: nil, 722 deploymentUpdates: nil, 723 place: 2, 724 inplace: 0, 725 stop: 2, 726 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 727 job.TaskGroups[0].Name: { 728 Place: 2, 729 Stop: 2, 730 Ignore: 8, 731 }, 732 }, 733 }) 734 735 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 736 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 737 } 738 739 // Tests the reconciler properly handles lost nodes with allocations while 740 // scaling up 741 func TestReconciler_LostNode_ScaleUp(t *testing.T) { 742 // Set desired 15 743 job := mock.Job() 744 job.TaskGroups[0].Count = 15 745 746 // Create 10 existing allocations 747 var allocs []*structs.Allocation 748 for i := 0; i < 10; i++ { 749 alloc := mock.Alloc() 750 alloc.Job = job 751 alloc.JobID = job.ID 752 alloc.NodeID = uuid.Generate() 753 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 754 allocs = append(allocs, alloc) 755 } 756 757 // Build a map of tainted nodes 758 tainted := make(map[string]*structs.Node, 2) 759 for i := 0; i < 2; i++ { 760 n := mock.Node() 761 n.ID = allocs[i].NodeID 762 n.Status = structs.NodeStatusDown 763 tainted[n.ID] = n 764 } 765 766 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 767 r := reconciler.Compute() 768 769 // Assert the correct results 770 assertResults(t, r, &resultExpectation{ 771 createDeployment: nil, 772 deploymentUpdates: nil, 773 place: 7, 774 inplace: 0, 775 stop: 2, 776 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 777 job.TaskGroups[0].Name: { 778 Place: 7, 779 Stop: 2, 780 Ignore: 8, 781 }, 782 }, 783 }) 784 785 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 786 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 787 } 788 789 // Tests the reconciler properly handles lost nodes with allocations while 790 // scaling down 791 func TestReconciler_LostNode_ScaleDown(t *testing.T) { 792 // Set desired 5 793 job := mock.Job() 794 job.TaskGroups[0].Count = 5 795 796 // Create 10 existing allocations 797 var allocs []*structs.Allocation 798 for i := 0; i < 10; i++ { 799 alloc := mock.Alloc() 800 alloc.Job = job 801 alloc.JobID = job.ID 802 alloc.NodeID = uuid.Generate() 803 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 804 allocs = append(allocs, alloc) 805 } 806 807 // Build a map of tainted nodes 808 tainted := make(map[string]*structs.Node, 2) 809 for i := 0; i < 2; i++ { 810 n := mock.Node() 811 n.ID = allocs[i].NodeID 812 n.Status = structs.NodeStatusDown 813 tainted[n.ID] = n 814 } 815 816 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 817 r := reconciler.Compute() 818 819 // Assert the correct results 820 assertResults(t, r, &resultExpectation{ 821 createDeployment: nil, 822 deploymentUpdates: nil, 823 place: 0, 824 inplace: 0, 825 stop: 5, 826 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 827 job.TaskGroups[0].Name: { 828 Stop: 5, 829 Ignore: 5, 830 }, 831 }, 832 }) 833 834 assertNamesHaveIndexes(t, intRange(0, 1, 7, 9), stopResultsToNames(r.stop)) 835 } 836 837 // Tests the reconciler properly handles draining nodes with allocations 838 func TestReconciler_DrainNode(t *testing.T) { 839 job := mock.Job() 840 841 // Create 10 existing allocations 842 var allocs []*structs.Allocation 843 for i := 0; i < 10; i++ { 844 alloc := mock.Alloc() 845 alloc.Job = job 846 alloc.JobID = job.ID 847 alloc.NodeID = uuid.Generate() 848 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 849 allocs = append(allocs, alloc) 850 } 851 852 // Build a map of tainted nodes 853 tainted := make(map[string]*structs.Node, 2) 854 for i := 0; i < 2; i++ { 855 n := mock.Node() 856 n.ID = allocs[i].NodeID 857 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 858 n.Drain = true 859 tainted[n.ID] = n 860 } 861 862 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 863 r := reconciler.Compute() 864 865 // Assert the correct results 866 assertResults(t, r, &resultExpectation{ 867 createDeployment: nil, 868 deploymentUpdates: nil, 869 place: 2, 870 inplace: 0, 871 stop: 2, 872 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 873 job.TaskGroups[0].Name: { 874 Migrate: 2, 875 Ignore: 8, 876 }, 877 }, 878 }) 879 880 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 881 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 882 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 883 // These should not have the reschedule field set 884 assertPlacementsAreRescheduled(t, 0, r.place) 885 } 886 887 // Tests the reconciler properly handles draining nodes with allocations while 888 // scaling up 889 func TestReconciler_DrainNode_ScaleUp(t *testing.T) { 890 // Set desired 15 891 job := mock.Job() 892 job.TaskGroups[0].Count = 15 893 894 // Create 10 existing allocations 895 var allocs []*structs.Allocation 896 for i := 0; i < 10; i++ { 897 alloc := mock.Alloc() 898 alloc.Job = job 899 alloc.JobID = job.ID 900 alloc.NodeID = uuid.Generate() 901 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 902 allocs = append(allocs, alloc) 903 } 904 905 // Build a map of tainted nodes 906 tainted := make(map[string]*structs.Node, 2) 907 for i := 0; i < 2; i++ { 908 n := mock.Node() 909 n.ID = allocs[i].NodeID 910 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 911 n.Drain = true 912 tainted[n.ID] = n 913 } 914 915 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 916 r := reconciler.Compute() 917 918 // Assert the correct results 919 assertResults(t, r, &resultExpectation{ 920 createDeployment: nil, 921 deploymentUpdates: nil, 922 place: 7, 923 inplace: 0, 924 stop: 2, 925 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 926 job.TaskGroups[0].Name: { 927 Place: 5, 928 Migrate: 2, 929 Ignore: 8, 930 }, 931 }, 932 }) 933 934 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 935 assertNamesHaveIndexes(t, intRange(0, 1, 10, 14), placeResultsToNames(r.place)) 936 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 937 // These should not have the reschedule field set 938 assertPlacementsAreRescheduled(t, 0, r.place) 939 } 940 941 // Tests the reconciler properly handles draining nodes with allocations while 942 // scaling down 943 func TestReconciler_DrainNode_ScaleDown(t *testing.T) { 944 // Set desired 8 945 job := mock.Job() 946 job.TaskGroups[0].Count = 8 947 948 // Create 10 existing allocations 949 var allocs []*structs.Allocation 950 for i := 0; i < 10; i++ { 951 alloc := mock.Alloc() 952 alloc.Job = job 953 alloc.JobID = job.ID 954 alloc.NodeID = uuid.Generate() 955 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 956 allocs = append(allocs, alloc) 957 } 958 959 // Build a map of tainted nodes 960 tainted := make(map[string]*structs.Node, 3) 961 for i := 0; i < 3; i++ { 962 n := mock.Node() 963 n.ID = allocs[i].NodeID 964 allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) 965 n.Drain = true 966 tainted[n.ID] = n 967 } 968 969 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted, "") 970 r := reconciler.Compute() 971 972 // Assert the correct results 973 assertResults(t, r, &resultExpectation{ 974 createDeployment: nil, 975 deploymentUpdates: nil, 976 place: 1, 977 inplace: 0, 978 stop: 3, 979 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 980 job.TaskGroups[0].Name: { 981 Migrate: 1, 982 Stop: 2, 983 Ignore: 7, 984 }, 985 }, 986 }) 987 988 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 989 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 990 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 991 // These should not have the reschedule field set 992 assertPlacementsAreRescheduled(t, 0, r.place) 993 } 994 995 // Tests the reconciler properly handles a task group being removed 996 func TestReconciler_RemovedTG(t *testing.T) { 997 job := mock.Job() 998 999 // Create 10 allocations for a tg that no longer exists 1000 var allocs []*structs.Allocation 1001 for i := 0; i < 10; i++ { 1002 alloc := mock.Alloc() 1003 alloc.Job = job 1004 alloc.JobID = job.ID 1005 alloc.NodeID = uuid.Generate() 1006 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1007 allocs = append(allocs, alloc) 1008 } 1009 1010 oldName := job.TaskGroups[0].Name 1011 newName := "different" 1012 job.TaskGroups[0].Name = newName 1013 1014 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1015 r := reconciler.Compute() 1016 1017 // Assert the correct results 1018 assertResults(t, r, &resultExpectation{ 1019 createDeployment: nil, 1020 deploymentUpdates: nil, 1021 place: 10, 1022 inplace: 0, 1023 stop: 10, 1024 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1025 oldName: { 1026 Stop: 10, 1027 }, 1028 newName: { 1029 Place: 10, 1030 }, 1031 }, 1032 }) 1033 1034 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1035 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 1036 } 1037 1038 // Tests the reconciler properly handles a job in stopped states 1039 func TestReconciler_JobStopped(t *testing.T) { 1040 job := mock.Job() 1041 job.Stop = true 1042 1043 cases := []struct { 1044 name string 1045 job *structs.Job 1046 jobID, taskGroup string 1047 }{ 1048 { 1049 name: "stopped job", 1050 job: job, 1051 jobID: job.ID, 1052 taskGroup: job.TaskGroups[0].Name, 1053 }, 1054 { 1055 name: "nil job", 1056 job: nil, 1057 jobID: "foo", 1058 taskGroup: "bar", 1059 }, 1060 } 1061 1062 for _, c := range cases { 1063 t.Run(c.name, func(t *testing.T) { 1064 // Create 10 allocations 1065 var allocs []*structs.Allocation 1066 for i := 0; i < 10; i++ { 1067 alloc := mock.Alloc() 1068 alloc.Job = c.job 1069 alloc.JobID = c.jobID 1070 alloc.NodeID = uuid.Generate() 1071 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1072 alloc.TaskGroup = c.taskGroup 1073 allocs = append(allocs, alloc) 1074 } 1075 1076 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "") 1077 r := reconciler.Compute() 1078 1079 // Assert the correct results 1080 assertResults(t, r, &resultExpectation{ 1081 createDeployment: nil, 1082 deploymentUpdates: nil, 1083 place: 0, 1084 inplace: 0, 1085 stop: 10, 1086 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1087 c.taskGroup: { 1088 Stop: 10, 1089 }, 1090 }, 1091 }) 1092 1093 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 1094 }) 1095 } 1096 } 1097 1098 // Tests the reconciler doesn't update allocs in terminal state 1099 // when job is stopped or nil 1100 func TestReconciler_JobStopped_TerminalAllocs(t *testing.T) { 1101 job := mock.Job() 1102 job.Stop = true 1103 1104 cases := []struct { 1105 name string 1106 job *structs.Job 1107 jobID, taskGroup string 1108 }{ 1109 { 1110 name: "stopped job", 1111 job: job, 1112 jobID: job.ID, 1113 taskGroup: job.TaskGroups[0].Name, 1114 }, 1115 { 1116 name: "nil job", 1117 job: nil, 1118 jobID: "foo", 1119 taskGroup: "bar", 1120 }, 1121 } 1122 1123 for _, c := range cases { 1124 t.Run(c.name, func(t *testing.T) { 1125 // Create 10 terminal allocations 1126 var allocs []*structs.Allocation 1127 for i := 0; i < 10; i++ { 1128 alloc := mock.Alloc() 1129 alloc.Job = c.job 1130 alloc.JobID = c.jobID 1131 alloc.NodeID = uuid.Generate() 1132 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 1133 alloc.TaskGroup = c.taskGroup 1134 if i%2 == 0 { 1135 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1136 } else { 1137 alloc.ClientStatus = structs.AllocClientStatusFailed 1138 } 1139 allocs = append(allocs, alloc) 1140 } 1141 1142 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, nil, allocs, nil, "") 1143 r := reconciler.Compute() 1144 require.Len(t, r.stop, 0) 1145 // Assert the correct results 1146 assertResults(t, r, &resultExpectation{ 1147 createDeployment: nil, 1148 deploymentUpdates: nil, 1149 place: 0, 1150 inplace: 0, 1151 stop: 0, 1152 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1153 c.taskGroup: {}, 1154 }, 1155 }) 1156 }) 1157 } 1158 } 1159 1160 // Tests the reconciler properly handles jobs with multiple task groups 1161 func TestReconciler_MultiTG(t *testing.T) { 1162 job := mock.Job() 1163 tg2 := job.TaskGroups[0].Copy() 1164 tg2.Name = "foo" 1165 job.TaskGroups = append(job.TaskGroups, tg2) 1166 1167 // Create 2 existing allocations for the first tg 1168 var allocs []*structs.Allocation 1169 for i := 0; i < 2; i++ { 1170 alloc := mock.Alloc() 1171 alloc.Job = job 1172 alloc.JobID = job.ID 1173 alloc.NodeID = uuid.Generate() 1174 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1175 allocs = append(allocs, alloc) 1176 } 1177 1178 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1179 r := reconciler.Compute() 1180 1181 // Assert the correct results 1182 assertResults(t, r, &resultExpectation{ 1183 createDeployment: nil, 1184 deploymentUpdates: nil, 1185 place: 18, 1186 inplace: 0, 1187 stop: 0, 1188 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1189 job.TaskGroups[0].Name: { 1190 Place: 8, 1191 Ignore: 2, 1192 }, 1193 tg2.Name: { 1194 Place: 10, 1195 }, 1196 }, 1197 }) 1198 1199 assertNamesHaveIndexes(t, intRange(2, 9, 0, 9), placeResultsToNames(r.place)) 1200 } 1201 1202 // Tests the reconciler properly handles jobs with multiple task groups with 1203 // only one having an update stanza and a deployment already being created 1204 func TestReconciler_MultiTG_SingleUpdateStanza(t *testing.T) { 1205 job := mock.Job() 1206 tg2 := job.TaskGroups[0].Copy() 1207 tg2.Name = "foo" 1208 job.TaskGroups = append(job.TaskGroups, tg2) 1209 job.TaskGroups[0].Update = noCanaryUpdate 1210 1211 // Create all the allocs 1212 var allocs []*structs.Allocation 1213 for i := 0; i < 2; i++ { 1214 for j := 0; j < 10; j++ { 1215 alloc := mock.Alloc() 1216 alloc.Job = job 1217 alloc.JobID = job.ID 1218 alloc.NodeID = uuid.Generate() 1219 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[i].Name, uint(j)) 1220 alloc.TaskGroup = job.TaskGroups[i].Name 1221 allocs = append(allocs, alloc) 1222 } 1223 } 1224 1225 d := structs.NewDeployment(job) 1226 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 1227 DesiredTotal: 10, 1228 } 1229 1230 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 1231 r := reconciler.Compute() 1232 1233 // Assert the correct results 1234 assertResults(t, r, &resultExpectation{ 1235 createDeployment: nil, 1236 deploymentUpdates: nil, 1237 place: 0, 1238 inplace: 0, 1239 stop: 0, 1240 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1241 job.TaskGroups[0].Name: { 1242 Ignore: 10, 1243 }, 1244 tg2.Name: { 1245 Ignore: 10, 1246 }, 1247 }, 1248 }) 1249 } 1250 1251 // Tests delayed rescheduling of failed batch allocations 1252 func TestReconciler_RescheduleLater_Batch(t *testing.T) { 1253 require := require.New(t) 1254 1255 // Set desired 4 1256 job := mock.Job() 1257 job.TaskGroups[0].Count = 4 1258 now := time.Now() 1259 1260 // Set up reschedule policy 1261 delayDur := 15 * time.Second 1262 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1263 tgName := job.TaskGroups[0].Name 1264 1265 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1266 var allocs []*structs.Allocation 1267 for i := 0; i < 6; i++ { 1268 alloc := mock.Alloc() 1269 alloc.Job = job 1270 alloc.JobID = job.ID 1271 alloc.NodeID = uuid.Generate() 1272 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1273 allocs = append(allocs, alloc) 1274 alloc.ClientStatus = structs.AllocClientStatusRunning 1275 } 1276 1277 // Mark 3 as failed with restart tracking info 1278 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1279 allocs[0].NextAllocation = allocs[1].ID 1280 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1281 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1282 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1283 PrevAllocID: allocs[0].ID, 1284 PrevNodeID: uuid.Generate(), 1285 }, 1286 }} 1287 allocs[1].NextAllocation = allocs[2].ID 1288 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1289 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1290 StartedAt: now.Add(-1 * time.Hour), 1291 FinishedAt: now}} 1292 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1293 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1294 PrevAllocID: allocs[0].ID, 1295 PrevNodeID: uuid.Generate(), 1296 }, 1297 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1298 PrevAllocID: allocs[1].ID, 1299 PrevNodeID: uuid.Generate(), 1300 }, 1301 }} 1302 1303 // Mark one as complete 1304 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1305 1306 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1307 r := reconciler.Compute() 1308 1309 // Two reschedule attempts were already made, one more can be made at a future time 1310 // Verify that the follow up eval has the expected waitUntil time 1311 evals := r.desiredFollowupEvals[tgName] 1312 require.NotNil(evals) 1313 require.Equal(1, len(evals)) 1314 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1315 1316 // Alloc 5 should not be replaced because it is terminal 1317 assertResults(t, r, &resultExpectation{ 1318 createDeployment: nil, 1319 deploymentUpdates: nil, 1320 place: 0, 1321 inplace: 0, 1322 attributeUpdates: 1, 1323 stop: 0, 1324 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1325 job.TaskGroups[0].Name: { 1326 Place: 0, 1327 InPlaceUpdate: 0, 1328 Ignore: 4, 1329 }, 1330 }, 1331 }) 1332 assertNamesHaveIndexes(t, intRange(2, 2), attributeUpdatesToNames(r.attributeUpdates)) 1333 1334 // Verify that the followup evalID field is set correctly 1335 var annotated *structs.Allocation 1336 for _, a := range r.attributeUpdates { 1337 annotated = a 1338 } 1339 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1340 } 1341 1342 // Tests delayed rescheduling of failed batch allocations and batching of allocs 1343 // with fail times that are close together 1344 func TestReconciler_RescheduleLaterWithBatchedEvals_Batch(t *testing.T) { 1345 require := require.New(t) 1346 1347 // Set desired 4 1348 job := mock.Job() 1349 job.TaskGroups[0].Count = 10 1350 now := time.Now() 1351 1352 // Set up reschedule policy 1353 delayDur := 15 * time.Second 1354 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: delayDur, DelayFunction: "constant"} 1355 tgName := job.TaskGroups[0].Name 1356 1357 // Create 10 existing allocations 1358 var allocs []*structs.Allocation 1359 for i := 0; i < 10; i++ { 1360 alloc := mock.Alloc() 1361 alloc.Job = job 1362 alloc.JobID = job.ID 1363 alloc.NodeID = uuid.Generate() 1364 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1365 allocs = append(allocs, alloc) 1366 alloc.ClientStatus = structs.AllocClientStatusRunning 1367 } 1368 1369 // Mark 5 as failed with fail times very close together 1370 for i := 0; i < 5; i++ { 1371 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1372 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1373 StartedAt: now.Add(-1 * time.Hour), 1374 FinishedAt: now.Add(time.Duration(50*i) * time.Millisecond)}} 1375 } 1376 1377 // Mark two more as failed several seconds later 1378 for i := 5; i < 7; i++ { 1379 allocs[i].ClientStatus = structs.AllocClientStatusFailed 1380 allocs[i].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1381 StartedAt: now.Add(-1 * time.Hour), 1382 FinishedAt: now.Add(10 * time.Second)}} 1383 } 1384 1385 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, uuid.Generate()) 1386 r := reconciler.Compute() 1387 1388 // Verify that two follow up evals were created 1389 evals := r.desiredFollowupEvals[tgName] 1390 require.NotNil(evals) 1391 require.Equal(2, len(evals)) 1392 1393 // Verify expected WaitUntil values for both batched evals 1394 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1395 secondBatchDuration := delayDur + 10*time.Second 1396 require.Equal(now.Add(secondBatchDuration), evals[1].WaitUntil) 1397 1398 // Alloc 5 should not be replaced because it is terminal 1399 assertResults(t, r, &resultExpectation{ 1400 createDeployment: nil, 1401 deploymentUpdates: nil, 1402 place: 0, 1403 inplace: 0, 1404 attributeUpdates: 7, 1405 stop: 0, 1406 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1407 job.TaskGroups[0].Name: { 1408 Place: 0, 1409 InPlaceUpdate: 0, 1410 Ignore: 10, 1411 }, 1412 }, 1413 }) 1414 assertNamesHaveIndexes(t, intRange(0, 6), attributeUpdatesToNames(r.attributeUpdates)) 1415 1416 // Verify that the followup evalID field is set correctly 1417 for _, alloc := range r.attributeUpdates { 1418 if allocNameToIndex(alloc.Name) < 5 { 1419 require.Equal(evals[0].ID, alloc.FollowupEvalID) 1420 } else if allocNameToIndex(alloc.Name) < 7 { 1421 require.Equal(evals[1].ID, alloc.FollowupEvalID) 1422 } else { 1423 t.Fatalf("Unexpected alloc name in Inplace results %v", alloc.Name) 1424 } 1425 } 1426 } 1427 1428 // Tests rescheduling failed batch allocations 1429 func TestReconciler_RescheduleNow_Batch(t *testing.T) { 1430 require := require.New(t) 1431 // Set desired 4 1432 job := mock.Job() 1433 job.TaskGroups[0].Count = 4 1434 now := time.Now() 1435 // Set up reschedule policy 1436 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 3, Interval: 24 * time.Hour, Delay: 5 * time.Second, DelayFunction: "constant"} 1437 tgName := job.TaskGroups[0].Name 1438 // Create 6 existing allocations - 2 running, 1 complete and 3 failed 1439 var allocs []*structs.Allocation 1440 for i := 0; i < 6; i++ { 1441 alloc := mock.Alloc() 1442 alloc.Job = job 1443 alloc.JobID = job.ID 1444 alloc.NodeID = uuid.Generate() 1445 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1446 allocs = append(allocs, alloc) 1447 alloc.ClientStatus = structs.AllocClientStatusRunning 1448 } 1449 // Mark 3 as failed with restart tracking info 1450 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1451 allocs[0].NextAllocation = allocs[1].ID 1452 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1453 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1454 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1455 PrevAllocID: allocs[0].ID, 1456 PrevNodeID: uuid.Generate(), 1457 }, 1458 }} 1459 allocs[1].NextAllocation = allocs[2].ID 1460 allocs[2].ClientStatus = structs.AllocClientStatusFailed 1461 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1462 StartedAt: now.Add(-1 * time.Hour), 1463 FinishedAt: now.Add(-5 * time.Second)}} 1464 allocs[2].FollowupEvalID = uuid.Generate() 1465 allocs[2].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1466 {RescheduleTime: time.Now().Add(-2 * time.Hour).UTC().UnixNano(), 1467 PrevAllocID: allocs[0].ID, 1468 PrevNodeID: uuid.Generate(), 1469 }, 1470 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1471 PrevAllocID: allocs[1].ID, 1472 PrevNodeID: uuid.Generate(), 1473 }, 1474 }} 1475 // Mark one as complete 1476 allocs[5].ClientStatus = structs.AllocClientStatusComplete 1477 1478 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job.ID, job, nil, allocs, nil, "") 1479 reconciler.now = now 1480 r := reconciler.Compute() 1481 1482 // Verify that no follow up evals were created 1483 evals := r.desiredFollowupEvals[tgName] 1484 require.Nil(evals) 1485 1486 // Two reschedule attempts were made, one more can be made now 1487 // Alloc 5 should not be replaced because it is terminal 1488 assertResults(t, r, &resultExpectation{ 1489 createDeployment: nil, 1490 deploymentUpdates: nil, 1491 place: 1, 1492 inplace: 0, 1493 stop: 0, 1494 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1495 job.TaskGroups[0].Name: { 1496 Place: 1, 1497 Ignore: 3, 1498 }, 1499 }, 1500 }) 1501 1502 assertNamesHaveIndexes(t, intRange(2, 2), placeResultsToNames(r.place)) 1503 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1504 assertPlacementsAreRescheduled(t, 1, r.place) 1505 1506 } 1507 1508 // Tests rescheduling failed service allocations with desired state stop 1509 func TestReconciler_RescheduleLater_Service(t *testing.T) { 1510 require := require.New(t) 1511 1512 // Set desired 5 1513 job := mock.Job() 1514 job.TaskGroups[0].Count = 5 1515 tgName := job.TaskGroups[0].Name 1516 now := time.Now() 1517 1518 // Set up reschedule policy 1519 delayDur := 15 * time.Second 1520 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: 24 * time.Hour, Delay: delayDur, MaxDelay: 1 * time.Hour} 1521 1522 // Create 5 existing allocations 1523 var allocs []*structs.Allocation 1524 for i := 0; i < 5; i++ { 1525 alloc := mock.Alloc() 1526 alloc.Job = job 1527 alloc.JobID = job.ID 1528 alloc.NodeID = uuid.Generate() 1529 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1530 allocs = append(allocs, alloc) 1531 alloc.ClientStatus = structs.AllocClientStatusRunning 1532 } 1533 1534 // Mark two as failed 1535 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1536 1537 // Mark one of them as already rescheduled once 1538 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1539 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1540 PrevAllocID: uuid.Generate(), 1541 PrevNodeID: uuid.Generate(), 1542 }, 1543 }} 1544 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1545 StartedAt: now.Add(-1 * time.Hour), 1546 FinishedAt: now}} 1547 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1548 1549 // Mark one as desired state stop 1550 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1551 1552 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, uuid.Generate()) 1553 r := reconciler.Compute() 1554 1555 // Should place a new placement and create a follow up eval for the delayed reschedule 1556 // Verify that the follow up eval has the expected waitUntil time 1557 evals := r.desiredFollowupEvals[tgName] 1558 require.NotNil(evals) 1559 require.Equal(1, len(evals)) 1560 require.Equal(now.Add(delayDur), evals[0].WaitUntil) 1561 1562 assertResults(t, r, &resultExpectation{ 1563 createDeployment: nil, 1564 deploymentUpdates: nil, 1565 place: 1, 1566 inplace: 0, 1567 attributeUpdates: 1, 1568 stop: 0, 1569 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1570 job.TaskGroups[0].Name: { 1571 Place: 1, 1572 InPlaceUpdate: 0, 1573 Ignore: 4, 1574 }, 1575 }, 1576 }) 1577 1578 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1579 assertNamesHaveIndexes(t, intRange(1, 1), attributeUpdatesToNames(r.attributeUpdates)) 1580 1581 // Verify that the followup evalID field is set correctly 1582 var annotated *structs.Allocation 1583 for _, a := range r.attributeUpdates { 1584 annotated = a 1585 } 1586 require.Equal(evals[0].ID, annotated.FollowupEvalID) 1587 } 1588 1589 // Tests service allocations with client status complete 1590 func TestReconciler_Service_ClientStatusComplete(t *testing.T) { 1591 // Set desired 5 1592 job := mock.Job() 1593 job.TaskGroups[0].Count = 5 1594 1595 // Set up reschedule policy 1596 delayDur := 15 * time.Second 1597 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1598 Attempts: 1, 1599 Interval: 24 * time.Hour, 1600 Delay: delayDur, 1601 MaxDelay: 1 * time.Hour, 1602 } 1603 1604 // Create 5 existing allocations 1605 var allocs []*structs.Allocation 1606 for i := 0; i < 5; i++ { 1607 alloc := mock.Alloc() 1608 alloc.Job = job 1609 alloc.JobID = job.ID 1610 alloc.NodeID = uuid.Generate() 1611 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1612 allocs = append(allocs, alloc) 1613 alloc.ClientStatus = structs.AllocClientStatusRunning 1614 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1615 } 1616 1617 // Mark one as client status complete 1618 allocs[4].ClientStatus = structs.AllocClientStatusComplete 1619 1620 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1621 r := reconciler.Compute() 1622 1623 // Should place a new placement for the alloc that was marked complete 1624 assertResults(t, r, &resultExpectation{ 1625 createDeployment: nil, 1626 deploymentUpdates: nil, 1627 place: 1, 1628 inplace: 0, 1629 stop: 0, 1630 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1631 job.TaskGroups[0].Name: { 1632 Place: 1, 1633 InPlaceUpdate: 0, 1634 Ignore: 4, 1635 }, 1636 }, 1637 }) 1638 1639 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1640 1641 } 1642 1643 // Tests service job placement with desired stop and client status complete 1644 func TestReconciler_Service_DesiredStop_ClientStatusComplete(t *testing.T) { 1645 // Set desired 5 1646 job := mock.Job() 1647 job.TaskGroups[0].Count = 5 1648 1649 // Set up reschedule policy 1650 delayDur := 15 * time.Second 1651 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1652 Attempts: 1, 1653 Interval: 24 * time.Hour, 1654 Delay: delayDur, 1655 MaxDelay: 1 * time.Hour, 1656 } 1657 1658 // Create 5 existing allocations 1659 var allocs []*structs.Allocation 1660 for i := 0; i < 5; i++ { 1661 alloc := mock.Alloc() 1662 alloc.Job = job 1663 alloc.JobID = job.ID 1664 alloc.NodeID = uuid.Generate() 1665 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1666 allocs = append(allocs, alloc) 1667 alloc.ClientStatus = structs.AllocClientStatusRunning 1668 alloc.DesiredStatus = structs.AllocDesiredStatusRun 1669 } 1670 1671 // Mark one as failed but with desired status stop 1672 // Should not trigger rescheduling logic but should trigger a placement 1673 allocs[4].ClientStatus = structs.AllocClientStatusFailed 1674 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1675 1676 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1677 r := reconciler.Compute() 1678 1679 // Should place a new placement for the alloc that was marked stopped 1680 assertResults(t, r, &resultExpectation{ 1681 createDeployment: nil, 1682 deploymentUpdates: nil, 1683 place: 1, 1684 inplace: 0, 1685 stop: 0, 1686 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1687 job.TaskGroups[0].Name: { 1688 Place: 1, 1689 InPlaceUpdate: 0, 1690 Ignore: 4, 1691 }, 1692 }, 1693 }) 1694 1695 assertNamesHaveIndexes(t, intRange(4, 4), placeResultsToNames(r.place)) 1696 1697 // Should not have any follow up evals created 1698 require := require.New(t) 1699 require.Equal(0, len(r.desiredFollowupEvals)) 1700 } 1701 1702 // Tests rescheduling failed service allocations with desired state stop 1703 func TestReconciler_RescheduleNow_Service(t *testing.T) { 1704 require := require.New(t) 1705 1706 // Set desired 5 1707 job := mock.Job() 1708 job.TaskGroups[0].Count = 5 1709 tgName := job.TaskGroups[0].Name 1710 now := time.Now() 1711 1712 // Set up reschedule policy and update stanza 1713 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1714 Attempts: 1, 1715 Interval: 24 * time.Hour, 1716 Delay: 5 * time.Second, 1717 DelayFunction: "", 1718 MaxDelay: 1 * time.Hour, 1719 Unlimited: false, 1720 } 1721 job.TaskGroups[0].Update = noCanaryUpdate 1722 1723 // Create 5 existing allocations 1724 var allocs []*structs.Allocation 1725 for i := 0; i < 5; i++ { 1726 alloc := mock.Alloc() 1727 alloc.Job = job 1728 alloc.JobID = job.ID 1729 alloc.NodeID = uuid.Generate() 1730 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1731 allocs = append(allocs, alloc) 1732 alloc.ClientStatus = structs.AllocClientStatusRunning 1733 } 1734 1735 // Mark two as failed 1736 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1737 1738 // Mark one of them as already rescheduled once 1739 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1740 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1741 PrevAllocID: uuid.Generate(), 1742 PrevNodeID: uuid.Generate(), 1743 }, 1744 }} 1745 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1746 StartedAt: now.Add(-1 * time.Hour), 1747 FinishedAt: now.Add(-10 * time.Second)}} 1748 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1749 1750 // Mark one as desired state stop 1751 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 1752 1753 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1754 r := reconciler.Compute() 1755 1756 // Verify that no follow up evals were created 1757 evals := r.desiredFollowupEvals[tgName] 1758 require.Nil(evals) 1759 1760 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 1761 assertResults(t, r, &resultExpectation{ 1762 createDeployment: nil, 1763 deploymentUpdates: nil, 1764 place: 2, 1765 inplace: 0, 1766 stop: 0, 1767 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1768 job.TaskGroups[0].Name: { 1769 Place: 2, 1770 Ignore: 3, 1771 }, 1772 }, 1773 }) 1774 1775 // Rescheduled allocs should have previous allocs 1776 assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) 1777 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1778 assertPlacementsAreRescheduled(t, 1, r.place) 1779 } 1780 1781 // Tests rescheduling failed service allocations when there's clock drift (upto a second) 1782 func TestReconciler_RescheduleNow_WithinAllowedTimeWindow(t *testing.T) { 1783 require := require.New(t) 1784 1785 // Set desired 5 1786 job := mock.Job() 1787 job.TaskGroups[0].Count = 5 1788 tgName := job.TaskGroups[0].Name 1789 now := time.Now() 1790 1791 // Set up reschedule policy and update stanza 1792 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1793 Attempts: 1, 1794 Interval: 24 * time.Hour, 1795 Delay: 5 * time.Second, 1796 DelayFunction: "", 1797 MaxDelay: 1 * time.Hour, 1798 Unlimited: false, 1799 } 1800 job.TaskGroups[0].Update = noCanaryUpdate 1801 1802 // Create 5 existing allocations 1803 var allocs []*structs.Allocation 1804 for i := 0; i < 5; i++ { 1805 alloc := mock.Alloc() 1806 alloc.Job = job 1807 alloc.JobID = job.ID 1808 alloc.NodeID = uuid.Generate() 1809 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1810 allocs = append(allocs, alloc) 1811 alloc.ClientStatus = structs.AllocClientStatusRunning 1812 } 1813 1814 // Mark one as failed 1815 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1816 1817 // Mark one of them as already rescheduled once 1818 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1819 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1820 PrevAllocID: uuid.Generate(), 1821 PrevNodeID: uuid.Generate(), 1822 }, 1823 }} 1824 // Set fail time to 4 seconds ago which falls within the reschedule window 1825 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1826 StartedAt: now.Add(-1 * time.Hour), 1827 FinishedAt: now.Add(-4 * time.Second)}} 1828 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1829 1830 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 1831 reconciler.now = now 1832 r := reconciler.Compute() 1833 1834 // Verify that no follow up evals were created 1835 evals := r.desiredFollowupEvals[tgName] 1836 require.Nil(evals) 1837 1838 // Verify that one rescheduled alloc was placed 1839 assertResults(t, r, &resultExpectation{ 1840 createDeployment: nil, 1841 deploymentUpdates: nil, 1842 place: 1, 1843 inplace: 0, 1844 stop: 0, 1845 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1846 job.TaskGroups[0].Name: { 1847 Place: 1, 1848 Ignore: 4, 1849 }, 1850 }, 1851 }) 1852 1853 // Rescheduled allocs should have previous allocs 1854 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1855 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1856 assertPlacementsAreRescheduled(t, 1, r.place) 1857 } 1858 1859 // Tests rescheduling failed service allocations when the eval ID matches and there's a large clock drift 1860 func TestReconciler_RescheduleNow_EvalIDMatch(t *testing.T) { 1861 require := require.New(t) 1862 1863 // Set desired 5 1864 job := mock.Job() 1865 job.TaskGroups[0].Count = 5 1866 tgName := job.TaskGroups[0].Name 1867 now := time.Now() 1868 1869 // Set up reschedule policy and update stanza 1870 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1871 Attempts: 1, 1872 Interval: 24 * time.Hour, 1873 Delay: 5 * time.Second, 1874 DelayFunction: "", 1875 MaxDelay: 1 * time.Hour, 1876 Unlimited: false, 1877 } 1878 job.TaskGroups[0].Update = noCanaryUpdate 1879 1880 // Create 5 existing allocations 1881 var allocs []*structs.Allocation 1882 for i := 0; i < 5; i++ { 1883 alloc := mock.Alloc() 1884 alloc.Job = job 1885 alloc.JobID = job.ID 1886 alloc.NodeID = uuid.Generate() 1887 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1888 allocs = append(allocs, alloc) 1889 alloc.ClientStatus = structs.AllocClientStatusRunning 1890 } 1891 1892 // Mark one as failed 1893 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1894 1895 // Mark one of them as already rescheduled once 1896 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1897 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1898 PrevAllocID: uuid.Generate(), 1899 PrevNodeID: uuid.Generate(), 1900 }, 1901 }} 1902 // Set fail time to 5 seconds ago and eval ID 1903 evalID := uuid.Generate() 1904 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1905 StartedAt: now.Add(-1 * time.Hour), 1906 FinishedAt: now.Add(-5 * time.Second)}} 1907 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1908 allocs[1].FollowupEvalID = evalID 1909 1910 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, evalID) 1911 reconciler.now = now.Add(-30 * time.Second) 1912 r := reconciler.Compute() 1913 1914 // Verify that no follow up evals were created 1915 evals := r.desiredFollowupEvals[tgName] 1916 require.Nil(evals) 1917 1918 // Verify that one rescheduled alloc was placed 1919 assertResults(t, r, &resultExpectation{ 1920 createDeployment: nil, 1921 deploymentUpdates: nil, 1922 place: 1, 1923 inplace: 0, 1924 stop: 0, 1925 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 1926 job.TaskGroups[0].Name: { 1927 Place: 1, 1928 Ignore: 4, 1929 }, 1930 }, 1931 }) 1932 1933 // Rescheduled allocs should have previous allocs 1934 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 1935 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 1936 assertPlacementsAreRescheduled(t, 1, r.place) 1937 } 1938 1939 // Tests rescheduling failed service allocations when there are canaries 1940 func TestReconciler_RescheduleNow_Service_WithCanaries(t *testing.T) { 1941 require := require.New(t) 1942 1943 // Set desired 5 1944 job := mock.Job() 1945 job.TaskGroups[0].Count = 5 1946 tgName := job.TaskGroups[0].Name 1947 now := time.Now() 1948 1949 // Set up reschedule policy and update stanza 1950 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 1951 Attempts: 1, 1952 Interval: 24 * time.Hour, 1953 Delay: 5 * time.Second, 1954 DelayFunction: "", 1955 MaxDelay: 1 * time.Hour, 1956 Unlimited: false, 1957 } 1958 job.TaskGroups[0].Update = canaryUpdate 1959 1960 job2 := job.Copy() 1961 job2.Version++ 1962 1963 d := structs.NewDeployment(job2) 1964 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 1965 s := &structs.DeploymentState{ 1966 DesiredCanaries: 2, 1967 DesiredTotal: 5, 1968 } 1969 d.TaskGroups[job.TaskGroups[0].Name] = s 1970 1971 // Create 5 existing allocations 1972 var allocs []*structs.Allocation 1973 for i := 0; i < 5; i++ { 1974 alloc := mock.Alloc() 1975 alloc.Job = job 1976 alloc.JobID = job.ID 1977 alloc.NodeID = uuid.Generate() 1978 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 1979 allocs = append(allocs, alloc) 1980 alloc.ClientStatus = structs.AllocClientStatusRunning 1981 } 1982 1983 // Mark three as failed 1984 allocs[0].ClientStatus = structs.AllocClientStatusFailed 1985 1986 // Mark one of them as already rescheduled once 1987 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 1988 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 1989 PrevAllocID: uuid.Generate(), 1990 PrevNodeID: uuid.Generate(), 1991 }, 1992 }} 1993 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 1994 StartedAt: now.Add(-1 * time.Hour), 1995 FinishedAt: now.Add(-10 * time.Second)}} 1996 allocs[1].ClientStatus = structs.AllocClientStatusFailed 1997 1998 // Mark one as desired state stop 1999 allocs[4].ClientStatus = structs.AllocClientStatusFailed 2000 2001 // Create 2 canary allocations 2002 for i := 0; i < 2; i++ { 2003 alloc := mock.Alloc() 2004 alloc.Job = job 2005 alloc.JobID = job.ID 2006 alloc.NodeID = uuid.Generate() 2007 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2008 alloc.ClientStatus = structs.AllocClientStatusRunning 2009 alloc.DeploymentID = d.ID 2010 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2011 Canary: true, 2012 Healthy: helper.BoolToPtr(false), 2013 } 2014 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2015 allocs = append(allocs, alloc) 2016 } 2017 2018 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2019 r := reconciler.Compute() 2020 2021 // Verify that no follow up evals were created 2022 evals := r.desiredFollowupEvals[tgName] 2023 require.Nil(evals) 2024 2025 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2026 assertResults(t, r, &resultExpectation{ 2027 createDeployment: nil, 2028 deploymentUpdates: nil, 2029 place: 2, 2030 inplace: 0, 2031 stop: 0, 2032 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2033 job.TaskGroups[0].Name: { 2034 Place: 2, 2035 Ignore: 5, 2036 }, 2037 }, 2038 }) 2039 2040 // Rescheduled allocs should have previous allocs 2041 assertNamesHaveIndexes(t, intRange(1, 1, 4, 4), placeResultsToNames(r.place)) 2042 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 2043 assertPlacementsAreRescheduled(t, 2, r.place) 2044 } 2045 2046 // Tests rescheduling failed canary service allocations 2047 func TestReconciler_RescheduleNow_Service_Canaries(t *testing.T) { 2048 require := require.New(t) 2049 2050 // Set desired 5 2051 job := mock.Job() 2052 job.TaskGroups[0].Count = 5 2053 tgName := job.TaskGroups[0].Name 2054 now := time.Now() 2055 2056 // Set up reschedule policy and update stanza 2057 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2058 Delay: 5 * time.Second, 2059 DelayFunction: "constant", 2060 MaxDelay: 1 * time.Hour, 2061 Unlimited: true, 2062 } 2063 job.TaskGroups[0].Update = canaryUpdate 2064 2065 job2 := job.Copy() 2066 job2.Version++ 2067 2068 d := structs.NewDeployment(job2) 2069 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2070 s := &structs.DeploymentState{ 2071 DesiredCanaries: 2, 2072 DesiredTotal: 5, 2073 } 2074 d.TaskGroups[job.TaskGroups[0].Name] = s 2075 2076 // Create 5 existing allocations 2077 var allocs []*structs.Allocation 2078 for i := 0; i < 5; i++ { 2079 alloc := mock.Alloc() 2080 alloc.Job = job 2081 alloc.JobID = job.ID 2082 alloc.NodeID = uuid.Generate() 2083 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2084 allocs = append(allocs, alloc) 2085 alloc.ClientStatus = structs.AllocClientStatusRunning 2086 } 2087 2088 // Create 2 healthy canary allocations 2089 for i := 0; i < 2; i++ { 2090 alloc := mock.Alloc() 2091 alloc.Job = job 2092 alloc.JobID = job.ID 2093 alloc.NodeID = uuid.Generate() 2094 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2095 alloc.ClientStatus = structs.AllocClientStatusRunning 2096 alloc.DeploymentID = d.ID 2097 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2098 Canary: true, 2099 Healthy: helper.BoolToPtr(false), 2100 } 2101 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2102 allocs = append(allocs, alloc) 2103 } 2104 2105 // Mark the canaries as failed 2106 allocs[5].ClientStatus = structs.AllocClientStatusFailed 2107 allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2108 2109 // Mark one of them as already rescheduled once 2110 allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2111 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 2112 PrevAllocID: uuid.Generate(), 2113 PrevNodeID: uuid.Generate(), 2114 }, 2115 }} 2116 2117 allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 2118 StartedAt: now.Add(-1 * time.Hour), 2119 FinishedAt: now.Add(-10 * time.Second)}} 2120 allocs[6].ClientStatus = structs.AllocClientStatusFailed 2121 allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2122 2123 // Create 4 unhealthy canary allocations that have already been replaced 2124 for i := 0; i < 4; i++ { 2125 alloc := mock.Alloc() 2126 alloc.Job = job 2127 alloc.JobID = job.ID 2128 alloc.NodeID = uuid.Generate() 2129 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 2130 alloc.ClientStatus = structs.AllocClientStatusFailed 2131 alloc.DeploymentID = d.ID 2132 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2133 Canary: true, 2134 Healthy: helper.BoolToPtr(false), 2135 } 2136 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2137 allocs = append(allocs, alloc) 2138 } 2139 2140 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2141 reconciler.now = now 2142 r := reconciler.Compute() 2143 2144 // Verify that no follow up evals were created 2145 evals := r.desiredFollowupEvals[tgName] 2146 require.Nil(evals) 2147 2148 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2149 assertResults(t, r, &resultExpectation{ 2150 createDeployment: nil, 2151 deploymentUpdates: nil, 2152 place: 2, 2153 inplace: 0, 2154 stop: 0, 2155 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2156 job.TaskGroups[0].Name: { 2157 Place: 2, 2158 Ignore: 9, 2159 }, 2160 }, 2161 }) 2162 2163 // Rescheduled allocs should have previous allocs 2164 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 2165 assertPlaceResultsHavePreviousAllocs(t, 2, r.place) 2166 assertPlacementsAreRescheduled(t, 2, r.place) 2167 } 2168 2169 // Tests rescheduling failed canary service allocations when one has reached its 2170 // reschedule limit 2171 func TestReconciler_RescheduleNow_Service_Canaries_Limit(t *testing.T) { 2172 require := require.New(t) 2173 2174 // Set desired 5 2175 job := mock.Job() 2176 job.TaskGroups[0].Count = 5 2177 tgName := job.TaskGroups[0].Name 2178 now := time.Now() 2179 2180 // Set up reschedule policy and update stanza 2181 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2182 Attempts: 1, 2183 Interval: 24 * time.Hour, 2184 Delay: 5 * time.Second, 2185 DelayFunction: "", 2186 MaxDelay: 1 * time.Hour, 2187 Unlimited: false, 2188 } 2189 job.TaskGroups[0].Update = canaryUpdate 2190 2191 job2 := job.Copy() 2192 job2.Version++ 2193 2194 d := structs.NewDeployment(job2) 2195 d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 2196 s := &structs.DeploymentState{ 2197 DesiredCanaries: 2, 2198 DesiredTotal: 5, 2199 } 2200 d.TaskGroups[job.TaskGroups[0].Name] = s 2201 2202 // Create 5 existing allocations 2203 var allocs []*structs.Allocation 2204 for i := 0; i < 5; i++ { 2205 alloc := mock.Alloc() 2206 alloc.Job = job 2207 alloc.JobID = job.ID 2208 alloc.NodeID = uuid.Generate() 2209 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2210 allocs = append(allocs, alloc) 2211 alloc.ClientStatus = structs.AllocClientStatusRunning 2212 } 2213 2214 // Create 2 healthy canary allocations 2215 for i := 0; i < 2; i++ { 2216 alloc := mock.Alloc() 2217 alloc.Job = job 2218 alloc.JobID = job.ID 2219 alloc.NodeID = uuid.Generate() 2220 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2221 alloc.ClientStatus = structs.AllocClientStatusRunning 2222 alloc.DeploymentID = d.ID 2223 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2224 Canary: true, 2225 Healthy: helper.BoolToPtr(false), 2226 } 2227 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2228 allocs = append(allocs, alloc) 2229 } 2230 2231 // Mark the canaries as failed 2232 allocs[5].ClientStatus = structs.AllocClientStatusFailed 2233 allocs[5].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2234 2235 // Mark one of them as already rescheduled once 2236 allocs[5].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2237 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 2238 PrevAllocID: uuid.Generate(), 2239 PrevNodeID: uuid.Generate(), 2240 }, 2241 }} 2242 2243 allocs[6].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 2244 StartedAt: now.Add(-1 * time.Hour), 2245 FinishedAt: now.Add(-10 * time.Second)}} 2246 allocs[6].ClientStatus = structs.AllocClientStatusFailed 2247 allocs[6].DesiredTransition.Reschedule = helper.BoolToPtr(true) 2248 2249 // Create 4 unhealthy canary allocations that have already been replaced 2250 for i := 0; i < 4; i++ { 2251 alloc := mock.Alloc() 2252 alloc.Job = job 2253 alloc.JobID = job.ID 2254 alloc.NodeID = uuid.Generate() 2255 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%2)) 2256 alloc.ClientStatus = structs.AllocClientStatusFailed 2257 alloc.DeploymentID = d.ID 2258 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 2259 Canary: true, 2260 Healthy: helper.BoolToPtr(false), 2261 } 2262 s.PlacedCanaries = append(s.PlacedCanaries, alloc.ID) 2263 allocs = append(allocs, alloc) 2264 } 2265 2266 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job2, d, allocs, nil, "") 2267 reconciler.now = now 2268 r := reconciler.Compute() 2269 2270 // Verify that no follow up evals were created 2271 evals := r.desiredFollowupEvals[tgName] 2272 require.Nil(evals) 2273 2274 // Verify that one rescheduled alloc and one replacement for terminal alloc were placed 2275 assertResults(t, r, &resultExpectation{ 2276 createDeployment: nil, 2277 deploymentUpdates: nil, 2278 place: 1, 2279 inplace: 0, 2280 stop: 0, 2281 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2282 job.TaskGroups[0].Name: { 2283 Place: 1, 2284 Ignore: 10, 2285 }, 2286 }, 2287 }) 2288 2289 // Rescheduled allocs should have previous allocs 2290 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2291 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 2292 assertPlacementsAreRescheduled(t, 1, r.place) 2293 } 2294 2295 // Tests failed service allocations that were already rescheduled won't be rescheduled again 2296 func TestReconciler_DontReschedule_PreviouslyRescheduled(t *testing.T) { 2297 // Set desired 5 2298 job := mock.Job() 2299 job.TaskGroups[0].Count = 5 2300 2301 // Set up reschedule policy 2302 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 5, Interval: 24 * time.Hour} 2303 2304 // Create 7 existing allocations 2305 var allocs []*structs.Allocation 2306 for i := 0; i < 7; i++ { 2307 alloc := mock.Alloc() 2308 alloc.Job = job 2309 alloc.JobID = job.ID 2310 alloc.NodeID = uuid.Generate() 2311 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2312 allocs = append(allocs, alloc) 2313 alloc.ClientStatus = structs.AllocClientStatusRunning 2314 } 2315 // Mark two as failed and rescheduled 2316 allocs[0].ClientStatus = structs.AllocClientStatusFailed 2317 allocs[0].ID = allocs[1].ID 2318 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2319 allocs[1].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 2320 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 2321 PrevAllocID: uuid.Generate(), 2322 PrevNodeID: uuid.Generate(), 2323 }, 2324 }} 2325 allocs[1].NextAllocation = allocs[2].ID 2326 2327 // Mark one as desired state stop 2328 allocs[4].DesiredStatus = structs.AllocDesiredStatusStop 2329 2330 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2331 r := reconciler.Compute() 2332 2333 // Should place 1 - one is a new placement to make up the desired count of 5 2334 // failing allocs are not rescheduled 2335 assertResults(t, r, &resultExpectation{ 2336 createDeployment: nil, 2337 deploymentUpdates: nil, 2338 place: 1, 2339 inplace: 0, 2340 stop: 0, 2341 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2342 job.TaskGroups[0].Name: { 2343 Place: 1, 2344 Ignore: 4, 2345 }, 2346 }, 2347 }) 2348 2349 // name index 0 is used for the replacement because its 2350 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 2351 } 2352 2353 // Tests the reconciler cancels an old deployment when the job is being stopped 2354 func TestReconciler_CancelDeployment_JobStop(t *testing.T) { 2355 job := mock.Job() 2356 job.Stop = true 2357 2358 running := structs.NewDeployment(job) 2359 failed := structs.NewDeployment(job) 2360 failed.Status = structs.DeploymentStatusFailed 2361 2362 cases := []struct { 2363 name string 2364 job *structs.Job 2365 jobID, taskGroup string 2366 deployment *structs.Deployment 2367 cancel bool 2368 }{ 2369 { 2370 name: "stopped job, running deployment", 2371 job: job, 2372 jobID: job.ID, 2373 taskGroup: job.TaskGroups[0].Name, 2374 deployment: running, 2375 cancel: true, 2376 }, 2377 { 2378 name: "nil job, running deployment", 2379 job: nil, 2380 jobID: "foo", 2381 taskGroup: "bar", 2382 deployment: running, 2383 cancel: true, 2384 }, 2385 { 2386 name: "stopped job, failed deployment", 2387 job: job, 2388 jobID: job.ID, 2389 taskGroup: job.TaskGroups[0].Name, 2390 deployment: failed, 2391 cancel: false, 2392 }, 2393 { 2394 name: "nil job, failed deployment", 2395 job: nil, 2396 jobID: "foo", 2397 taskGroup: "bar", 2398 deployment: failed, 2399 cancel: false, 2400 }, 2401 } 2402 2403 for _, c := range cases { 2404 t.Run(c.name, func(t *testing.T) { 2405 // Create 10 allocations 2406 var allocs []*structs.Allocation 2407 for i := 0; i < 10; i++ { 2408 alloc := mock.Alloc() 2409 alloc.Job = c.job 2410 alloc.JobID = c.jobID 2411 alloc.NodeID = uuid.Generate() 2412 alloc.Name = structs.AllocName(c.jobID, c.taskGroup, uint(i)) 2413 alloc.TaskGroup = c.taskGroup 2414 allocs = append(allocs, alloc) 2415 } 2416 2417 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, c.jobID, c.job, c.deployment, allocs, nil, "") 2418 r := reconciler.Compute() 2419 2420 var updates []*structs.DeploymentStatusUpdate 2421 if c.cancel { 2422 updates = []*structs.DeploymentStatusUpdate{ 2423 { 2424 DeploymentID: c.deployment.ID, 2425 Status: structs.DeploymentStatusCancelled, 2426 StatusDescription: structs.DeploymentStatusDescriptionStoppedJob, 2427 }, 2428 } 2429 } 2430 2431 // Assert the correct results 2432 assertResults(t, r, &resultExpectation{ 2433 createDeployment: nil, 2434 deploymentUpdates: updates, 2435 place: 0, 2436 inplace: 0, 2437 stop: 10, 2438 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2439 c.taskGroup: { 2440 Stop: 10, 2441 }, 2442 }, 2443 }) 2444 2445 assertNamesHaveIndexes(t, intRange(0, 9), stopResultsToNames(r.stop)) 2446 }) 2447 } 2448 } 2449 2450 // Tests the reconciler cancels an old deployment when the job is updated 2451 func TestReconciler_CancelDeployment_JobUpdate(t *testing.T) { 2452 // Create a base job 2453 job := mock.Job() 2454 2455 // Create two deployments 2456 running := structs.NewDeployment(job) 2457 failed := structs.NewDeployment(job) 2458 failed.Status = structs.DeploymentStatusFailed 2459 2460 // Make the job newer than the deployment 2461 job.Version += 10 2462 2463 cases := []struct { 2464 name string 2465 deployment *structs.Deployment 2466 cancel bool 2467 }{ 2468 { 2469 name: "running deployment", 2470 deployment: running, 2471 cancel: true, 2472 }, 2473 { 2474 name: "failed deployment", 2475 deployment: failed, 2476 cancel: false, 2477 }, 2478 } 2479 2480 for _, c := range cases { 2481 t.Run(c.name, func(t *testing.T) { 2482 // Create 10 allocations 2483 var allocs []*structs.Allocation 2484 for i := 0; i < 10; i++ { 2485 alloc := mock.Alloc() 2486 alloc.Job = job 2487 alloc.JobID = job.ID 2488 alloc.NodeID = uuid.Generate() 2489 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2490 alloc.TaskGroup = job.TaskGroups[0].Name 2491 allocs = append(allocs, alloc) 2492 } 2493 2494 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, c.deployment, allocs, nil, "") 2495 r := reconciler.Compute() 2496 2497 var updates []*structs.DeploymentStatusUpdate 2498 if c.cancel { 2499 updates = []*structs.DeploymentStatusUpdate{ 2500 { 2501 DeploymentID: c.deployment.ID, 2502 Status: structs.DeploymentStatusCancelled, 2503 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 2504 }, 2505 } 2506 } 2507 2508 // Assert the correct results 2509 assertResults(t, r, &resultExpectation{ 2510 createDeployment: nil, 2511 deploymentUpdates: updates, 2512 place: 0, 2513 inplace: 0, 2514 stop: 0, 2515 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2516 job.TaskGroups[0].Name: { 2517 Ignore: 10, 2518 }, 2519 }, 2520 }) 2521 }) 2522 } 2523 } 2524 2525 // Tests the reconciler creates a deployment and does a rolling upgrade with 2526 // destructive changes 2527 func TestReconciler_CreateDeployment_RollingUpgrade_Destructive(t *testing.T) { 2528 job := mock.Job() 2529 job.TaskGroups[0].Update = noCanaryUpdate 2530 2531 // Create 10 allocations from the old job 2532 var allocs []*structs.Allocation 2533 for i := 0; i < 10; i++ { 2534 alloc := mock.Alloc() 2535 alloc.Job = job 2536 alloc.JobID = job.ID 2537 alloc.NodeID = uuid.Generate() 2538 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2539 alloc.TaskGroup = job.TaskGroups[0].Name 2540 allocs = append(allocs, alloc) 2541 } 2542 2543 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 2544 r := reconciler.Compute() 2545 2546 d := structs.NewDeployment(job) 2547 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2548 DesiredTotal: 10, 2549 } 2550 2551 // Assert the correct results 2552 assertResults(t, r, &resultExpectation{ 2553 createDeployment: d, 2554 deploymentUpdates: nil, 2555 destructive: 4, 2556 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2557 job.TaskGroups[0].Name: { 2558 DestructiveUpdate: 4, 2559 Ignore: 6, 2560 }, 2561 }, 2562 }) 2563 2564 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 2565 } 2566 2567 // Tests the reconciler creates a deployment for inplace updates 2568 func TestReconciler_CreateDeployment_RollingUpgrade_Inplace(t *testing.T) { 2569 jobOld := mock.Job() 2570 job := jobOld.Copy() 2571 job.Version++ 2572 job.TaskGroups[0].Update = noCanaryUpdate 2573 2574 // Create 10 allocations from the old job 2575 var allocs []*structs.Allocation 2576 for i := 0; i < 10; i++ { 2577 alloc := mock.Alloc() 2578 alloc.Job = jobOld 2579 alloc.JobID = job.ID 2580 alloc.NodeID = uuid.Generate() 2581 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2582 alloc.TaskGroup = job.TaskGroups[0].Name 2583 allocs = append(allocs, alloc) 2584 } 2585 2586 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnInplace, false, job.ID, job, nil, allocs, nil, "") 2587 r := reconciler.Compute() 2588 2589 d := structs.NewDeployment(job) 2590 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2591 DesiredTotal: 10, 2592 } 2593 2594 // Assert the correct results 2595 assertResults(t, r, &resultExpectation{ 2596 createDeployment: d, 2597 deploymentUpdates: nil, 2598 place: 0, 2599 inplace: 10, 2600 stop: 0, 2601 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2602 job.TaskGroups[0].Name: { 2603 InPlaceUpdate: 10, 2604 }, 2605 }, 2606 }) 2607 } 2608 2609 // Tests the reconciler creates a deployment when the job has a newer create index 2610 func TestReconciler_CreateDeployment_NewerCreateIndex(t *testing.T) { 2611 jobOld := mock.Job() 2612 job := jobOld.Copy() 2613 job.TaskGroups[0].Update = noCanaryUpdate 2614 job.CreateIndex += 100 2615 2616 // Create 5 allocations from the old job 2617 var allocs []*structs.Allocation 2618 for i := 0; i < 5; i++ { 2619 alloc := mock.Alloc() 2620 alloc.Job = jobOld 2621 alloc.JobID = jobOld.ID 2622 alloc.NodeID = uuid.Generate() 2623 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2624 alloc.TaskGroup = job.TaskGroups[0].Name 2625 allocs = append(allocs, alloc) 2626 } 2627 2628 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2629 r := reconciler.Compute() 2630 2631 d := structs.NewDeployment(job) 2632 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2633 DesiredTotal: 5, 2634 } 2635 2636 // Assert the correct results 2637 assertResults(t, r, &resultExpectation{ 2638 createDeployment: d, 2639 deploymentUpdates: nil, 2640 place: 5, 2641 destructive: 0, 2642 inplace: 0, 2643 stop: 0, 2644 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2645 job.TaskGroups[0].Name: { 2646 InPlaceUpdate: 0, 2647 Ignore: 5, 2648 Place: 5, 2649 DestructiveUpdate: 0, 2650 }, 2651 }, 2652 }) 2653 } 2654 2655 // Tests the reconciler doesn't creates a deployment if there are no changes 2656 func TestReconciler_DontCreateDeployment_NoChanges(t *testing.T) { 2657 job := mock.Job() 2658 job.TaskGroups[0].Update = noCanaryUpdate 2659 2660 // Create 10 allocations from the job 2661 var allocs []*structs.Allocation 2662 for i := 0; i < 10; i++ { 2663 alloc := mock.Alloc() 2664 alloc.Job = job 2665 alloc.JobID = job.ID 2666 alloc.NodeID = uuid.Generate() 2667 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2668 alloc.TaskGroup = job.TaskGroups[0].Name 2669 allocs = append(allocs, alloc) 2670 } 2671 2672 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 2673 r := reconciler.Compute() 2674 2675 // Assert the correct results 2676 assertResults(t, r, &resultExpectation{ 2677 createDeployment: nil, 2678 deploymentUpdates: nil, 2679 place: 0, 2680 inplace: 0, 2681 stop: 0, 2682 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2683 job.TaskGroups[0].Name: { 2684 DestructiveUpdate: 0, 2685 Ignore: 10, 2686 }, 2687 }, 2688 }) 2689 } 2690 2691 // Tests the reconciler doesn't place any more canaries when the deployment is 2692 // paused or failed 2693 func TestReconciler_PausedOrFailedDeployment_NoMoreCanaries(t *testing.T) { 2694 job := mock.Job() 2695 job.TaskGroups[0].Update = canaryUpdate 2696 2697 cases := []struct { 2698 name string 2699 deploymentStatus string 2700 stop uint64 2701 }{ 2702 { 2703 name: "paused deployment", 2704 deploymentStatus: structs.DeploymentStatusPaused, 2705 stop: 0, 2706 }, 2707 { 2708 name: "failed deployment", 2709 deploymentStatus: structs.DeploymentStatusFailed, 2710 stop: 1, 2711 }, 2712 } 2713 2714 for _, c := range cases { 2715 t.Run(c.name, func(t *testing.T) { 2716 // Create a deployment that is paused/failed and has placed some canaries 2717 d := structs.NewDeployment(job) 2718 d.Status = c.deploymentStatus 2719 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2720 Promoted: false, 2721 DesiredCanaries: 2, 2722 DesiredTotal: 10, 2723 PlacedAllocs: 1, 2724 } 2725 2726 // Create 10 allocations for the original job 2727 var allocs []*structs.Allocation 2728 for i := 0; i < 10; i++ { 2729 alloc := mock.Alloc() 2730 alloc.Job = job 2731 alloc.JobID = job.ID 2732 alloc.NodeID = uuid.Generate() 2733 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2734 alloc.TaskGroup = job.TaskGroups[0].Name 2735 allocs = append(allocs, alloc) 2736 } 2737 2738 // Create one canary 2739 canary := mock.Alloc() 2740 canary.Job = job 2741 canary.JobID = job.ID 2742 canary.NodeID = uuid.Generate() 2743 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2744 canary.TaskGroup = job.TaskGroups[0].Name 2745 canary.DeploymentID = d.ID 2746 allocs = append(allocs, canary) 2747 d.TaskGroups[canary.TaskGroup].PlacedCanaries = []string{canary.ID} 2748 2749 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{canary.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2750 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2751 r := reconciler.Compute() 2752 2753 // Assert the correct results 2754 assertResults(t, r, &resultExpectation{ 2755 createDeployment: nil, 2756 deploymentUpdates: nil, 2757 place: 0, 2758 inplace: 0, 2759 stop: int(c.stop), 2760 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2761 job.TaskGroups[0].Name: { 2762 Ignore: 11 - c.stop, 2763 Stop: c.stop, 2764 }, 2765 }, 2766 }) 2767 }) 2768 } 2769 } 2770 2771 // Tests the reconciler doesn't place any more allocs when the deployment is 2772 // paused or failed 2773 func TestReconciler_PausedOrFailedDeployment_NoMorePlacements(t *testing.T) { 2774 job := mock.Job() 2775 job.TaskGroups[0].Update = noCanaryUpdate 2776 job.TaskGroups[0].Count = 15 2777 2778 cases := []struct { 2779 name string 2780 deploymentStatus string 2781 }{ 2782 { 2783 name: "paused deployment", 2784 deploymentStatus: structs.DeploymentStatusPaused, 2785 }, 2786 { 2787 name: "failed deployment", 2788 deploymentStatus: structs.DeploymentStatusFailed, 2789 }, 2790 } 2791 2792 for _, c := range cases { 2793 t.Run(c.name, func(t *testing.T) { 2794 // Create a deployment that is paused and has placed some canaries 2795 d := structs.NewDeployment(job) 2796 d.Status = c.deploymentStatus 2797 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2798 Promoted: false, 2799 DesiredTotal: 15, 2800 PlacedAllocs: 10, 2801 } 2802 2803 // Create 10 allocations for the new job 2804 var allocs []*structs.Allocation 2805 for i := 0; i < 10; i++ { 2806 alloc := mock.Alloc() 2807 alloc.Job = job 2808 alloc.JobID = job.ID 2809 alloc.NodeID = uuid.Generate() 2810 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2811 alloc.TaskGroup = job.TaskGroups[0].Name 2812 allocs = append(allocs, alloc) 2813 } 2814 2815 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 2816 r := reconciler.Compute() 2817 2818 // Assert the correct results 2819 assertResults(t, r, &resultExpectation{ 2820 createDeployment: nil, 2821 deploymentUpdates: nil, 2822 place: 0, 2823 inplace: 0, 2824 stop: 0, 2825 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2826 job.TaskGroups[0].Name: { 2827 Ignore: 10, 2828 }, 2829 }, 2830 }) 2831 }) 2832 } 2833 } 2834 2835 // Tests the reconciler doesn't do any more destructive updates when the 2836 // deployment is paused or failed 2837 func TestReconciler_PausedOrFailedDeployment_NoMoreDestructiveUpdates(t *testing.T) { 2838 job := mock.Job() 2839 job.TaskGroups[0].Update = noCanaryUpdate 2840 2841 cases := []struct { 2842 name string 2843 deploymentStatus string 2844 }{ 2845 { 2846 name: "paused deployment", 2847 deploymentStatus: structs.DeploymentStatusPaused, 2848 }, 2849 { 2850 name: "failed deployment", 2851 deploymentStatus: structs.DeploymentStatusFailed, 2852 }, 2853 } 2854 2855 for _, c := range cases { 2856 t.Run(c.name, func(t *testing.T) { 2857 // Create a deployment that is paused and has placed some canaries 2858 d := structs.NewDeployment(job) 2859 d.Status = c.deploymentStatus 2860 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 2861 Promoted: false, 2862 DesiredTotal: 10, 2863 PlacedAllocs: 1, 2864 } 2865 2866 // Create 9 allocations for the original job 2867 var allocs []*structs.Allocation 2868 for i := 1; i < 10; i++ { 2869 alloc := mock.Alloc() 2870 alloc.Job = job 2871 alloc.JobID = job.ID 2872 alloc.NodeID = uuid.Generate() 2873 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2874 alloc.TaskGroup = job.TaskGroups[0].Name 2875 allocs = append(allocs, alloc) 2876 } 2877 2878 // Create one for the new job 2879 newAlloc := mock.Alloc() 2880 newAlloc.Job = job 2881 newAlloc.JobID = job.ID 2882 newAlloc.NodeID = uuid.Generate() 2883 newAlloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, 0) 2884 newAlloc.TaskGroup = job.TaskGroups[0].Name 2885 newAlloc.DeploymentID = d.ID 2886 allocs = append(allocs, newAlloc) 2887 2888 mockUpdateFn := allocUpdateFnMock(map[string]allocUpdateType{newAlloc.ID: allocUpdateFnIgnore}, allocUpdateFnDestructive) 2889 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 2890 r := reconciler.Compute() 2891 2892 // Assert the correct results 2893 assertResults(t, r, &resultExpectation{ 2894 createDeployment: nil, 2895 deploymentUpdates: nil, 2896 place: 0, 2897 inplace: 0, 2898 stop: 0, 2899 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2900 job.TaskGroups[0].Name: { 2901 Ignore: 10, 2902 }, 2903 }, 2904 }) 2905 }) 2906 } 2907 } 2908 2909 // Tests the reconciler handles migrating a canary correctly on a draining node 2910 func TestReconciler_DrainNode_Canary(t *testing.T) { 2911 job := mock.Job() 2912 job.TaskGroups[0].Update = canaryUpdate 2913 2914 // Create a deployment that is paused and has placed some canaries 2915 d := structs.NewDeployment(job) 2916 s := &structs.DeploymentState{ 2917 Promoted: false, 2918 DesiredTotal: 10, 2919 DesiredCanaries: 2, 2920 PlacedAllocs: 2, 2921 } 2922 d.TaskGroups[job.TaskGroups[0].Name] = s 2923 2924 // Create 10 allocations from the old job 2925 var allocs []*structs.Allocation 2926 for i := 0; i < 10; i++ { 2927 alloc := mock.Alloc() 2928 alloc.Job = job 2929 alloc.JobID = job.ID 2930 alloc.NodeID = uuid.Generate() 2931 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2932 alloc.TaskGroup = job.TaskGroups[0].Name 2933 allocs = append(allocs, alloc) 2934 } 2935 2936 // Create two canaries for the new job 2937 handled := make(map[string]allocUpdateType) 2938 for i := 0; i < 2; i++ { 2939 // Create one canary 2940 canary := mock.Alloc() 2941 canary.Job = job 2942 canary.JobID = job.ID 2943 canary.NodeID = uuid.Generate() 2944 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 2945 canary.TaskGroup = job.TaskGroups[0].Name 2946 canary.DeploymentID = d.ID 2947 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 2948 allocs = append(allocs, canary) 2949 handled[canary.ID] = allocUpdateFnIgnore 2950 } 2951 2952 // Build a map of tainted nodes that contains the last canary 2953 tainted := make(map[string]*structs.Node, 1) 2954 n := mock.Node() 2955 n.ID = allocs[11].NodeID 2956 allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true) 2957 n.Drain = true 2958 tainted[n.ID] = n 2959 2960 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 2961 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 2962 r := reconciler.Compute() 2963 2964 // Assert the correct results 2965 assertResults(t, r, &resultExpectation{ 2966 createDeployment: nil, 2967 deploymentUpdates: nil, 2968 place: 1, 2969 inplace: 0, 2970 stop: 1, 2971 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 2972 job.TaskGroups[0].Name: { 2973 Canary: 1, 2974 Ignore: 11, 2975 }, 2976 }, 2977 }) 2978 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 2979 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 2980 } 2981 2982 // Tests the reconciler handles migrating a canary correctly on a lost node 2983 func TestReconciler_LostNode_Canary(t *testing.T) { 2984 job := mock.Job() 2985 job.TaskGroups[0].Update = canaryUpdate 2986 2987 // Create a deployment that is paused and has placed some canaries 2988 d := structs.NewDeployment(job) 2989 s := &structs.DeploymentState{ 2990 Promoted: false, 2991 DesiredTotal: 10, 2992 DesiredCanaries: 2, 2993 PlacedAllocs: 2, 2994 } 2995 d.TaskGroups[job.TaskGroups[0].Name] = s 2996 2997 // Create 10 allocations from the old job 2998 var allocs []*structs.Allocation 2999 for i := 0; i < 10; i++ { 3000 alloc := mock.Alloc() 3001 alloc.Job = job 3002 alloc.JobID = job.ID 3003 alloc.NodeID = uuid.Generate() 3004 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3005 alloc.TaskGroup = job.TaskGroups[0].Name 3006 allocs = append(allocs, alloc) 3007 } 3008 3009 // Create two canaries for the new job 3010 handled := make(map[string]allocUpdateType) 3011 for i := 0; i < 2; i++ { 3012 // Create one canary 3013 canary := mock.Alloc() 3014 canary.Job = job 3015 canary.JobID = job.ID 3016 canary.NodeID = uuid.Generate() 3017 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3018 canary.TaskGroup = job.TaskGroups[0].Name 3019 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3020 canary.DeploymentID = d.ID 3021 allocs = append(allocs, canary) 3022 handled[canary.ID] = allocUpdateFnIgnore 3023 } 3024 3025 // Build a map of tainted nodes that contains the last canary 3026 tainted := make(map[string]*structs.Node, 1) 3027 n := mock.Node() 3028 n.ID = allocs[11].NodeID 3029 n.Status = structs.NodeStatusDown 3030 tainted[n.ID] = n 3031 3032 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3033 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3034 r := reconciler.Compute() 3035 3036 // Assert the correct results 3037 assertResults(t, r, &resultExpectation{ 3038 createDeployment: nil, 3039 deploymentUpdates: nil, 3040 place: 1, 3041 inplace: 0, 3042 stop: 1, 3043 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3044 job.TaskGroups[0].Name: { 3045 Canary: 1, 3046 Ignore: 11, 3047 }, 3048 }, 3049 }) 3050 3051 assertNamesHaveIndexes(t, intRange(1, 1), stopResultsToNames(r.stop)) 3052 assertNamesHaveIndexes(t, intRange(1, 1), placeResultsToNames(r.place)) 3053 } 3054 3055 // Tests the reconciler handles stopping canaries from older deployments 3056 func TestReconciler_StopOldCanaries(t *testing.T) { 3057 job := mock.Job() 3058 job.TaskGroups[0].Update = canaryUpdate 3059 3060 // Create an old deployment that has placed some canaries 3061 d := structs.NewDeployment(job) 3062 s := &structs.DeploymentState{ 3063 Promoted: false, 3064 DesiredTotal: 10, 3065 DesiredCanaries: 2, 3066 PlacedAllocs: 2, 3067 } 3068 d.TaskGroups[job.TaskGroups[0].Name] = s 3069 3070 // Update the job 3071 job.Version += 10 3072 3073 // Create 10 allocations from the old job 3074 var allocs []*structs.Allocation 3075 for i := 0; i < 10; i++ { 3076 alloc := mock.Alloc() 3077 alloc.Job = job 3078 alloc.JobID = job.ID 3079 alloc.NodeID = uuid.Generate() 3080 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3081 alloc.TaskGroup = job.TaskGroups[0].Name 3082 allocs = append(allocs, alloc) 3083 } 3084 3085 // Create canaries 3086 for i := 0; i < 2; i++ { 3087 // Create one canary 3088 canary := mock.Alloc() 3089 canary.Job = job 3090 canary.JobID = job.ID 3091 canary.NodeID = uuid.Generate() 3092 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3093 canary.TaskGroup = job.TaskGroups[0].Name 3094 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3095 canary.DeploymentID = d.ID 3096 allocs = append(allocs, canary) 3097 } 3098 3099 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3100 r := reconciler.Compute() 3101 3102 newD := structs.NewDeployment(job) 3103 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3104 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3105 DesiredCanaries: 2, 3106 DesiredTotal: 10, 3107 } 3108 3109 // Assert the correct results 3110 assertResults(t, r, &resultExpectation{ 3111 createDeployment: newD, 3112 deploymentUpdates: []*structs.DeploymentStatusUpdate{ 3113 { 3114 DeploymentID: d.ID, 3115 Status: structs.DeploymentStatusCancelled, 3116 StatusDescription: structs.DeploymentStatusDescriptionNewerJob, 3117 }, 3118 }, 3119 place: 2, 3120 inplace: 0, 3121 stop: 2, 3122 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3123 job.TaskGroups[0].Name: { 3124 Canary: 2, 3125 Stop: 2, 3126 Ignore: 10, 3127 }, 3128 }, 3129 }) 3130 3131 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3132 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3133 } 3134 3135 // Tests the reconciler creates new canaries when the job changes 3136 func TestReconciler_NewCanaries(t *testing.T) { 3137 job := mock.Job() 3138 job.TaskGroups[0].Update = canaryUpdate 3139 3140 // Create 10 allocations from the old job 3141 var allocs []*structs.Allocation 3142 for i := 0; i < 10; i++ { 3143 alloc := mock.Alloc() 3144 alloc.Job = job 3145 alloc.JobID = job.ID 3146 alloc.NodeID = uuid.Generate() 3147 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3148 alloc.TaskGroup = job.TaskGroups[0].Name 3149 allocs = append(allocs, alloc) 3150 } 3151 3152 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3153 r := reconciler.Compute() 3154 3155 newD := structs.NewDeployment(job) 3156 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3157 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3158 DesiredCanaries: 2, 3159 DesiredTotal: 10, 3160 } 3161 3162 // Assert the correct results 3163 assertResults(t, r, &resultExpectation{ 3164 createDeployment: newD, 3165 deploymentUpdates: nil, 3166 place: 2, 3167 inplace: 0, 3168 stop: 0, 3169 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3170 job.TaskGroups[0].Name: { 3171 Canary: 2, 3172 Ignore: 10, 3173 }, 3174 }, 3175 }) 3176 3177 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3178 } 3179 3180 // Tests the reconciler creates new canaries when the job changes and the 3181 // canary count is greater than the task group count 3182 func TestReconciler_NewCanaries_CountGreater(t *testing.T) { 3183 job := mock.Job() 3184 job.TaskGroups[0].Count = 3 3185 job.TaskGroups[0].Update = canaryUpdate.Copy() 3186 job.TaskGroups[0].Update.Canary = 7 3187 3188 // Create 3 allocations from the old job 3189 var allocs []*structs.Allocation 3190 for i := 0; i < 3; i++ { 3191 alloc := mock.Alloc() 3192 alloc.Job = job 3193 alloc.JobID = job.ID 3194 alloc.NodeID = uuid.Generate() 3195 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3196 alloc.TaskGroup = job.TaskGroups[0].Name 3197 allocs = append(allocs, alloc) 3198 } 3199 3200 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3201 r := reconciler.Compute() 3202 3203 newD := structs.NewDeployment(job) 3204 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3205 state := &structs.DeploymentState{ 3206 DesiredCanaries: 7, 3207 DesiredTotal: 3, 3208 } 3209 newD.TaskGroups[job.TaskGroups[0].Name] = state 3210 3211 // Assert the correct results 3212 assertResults(t, r, &resultExpectation{ 3213 createDeployment: newD, 3214 deploymentUpdates: nil, 3215 place: 7, 3216 inplace: 0, 3217 stop: 0, 3218 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3219 job.TaskGroups[0].Name: { 3220 Canary: 7, 3221 Ignore: 3, 3222 }, 3223 }, 3224 }) 3225 3226 assertNamesHaveIndexes(t, intRange(0, 2, 3, 6), placeResultsToNames(r.place)) 3227 } 3228 3229 // Tests the reconciler creates new canaries when the job changes for multiple 3230 // task groups 3231 func TestReconciler_NewCanaries_MultiTG(t *testing.T) { 3232 job := mock.Job() 3233 job.TaskGroups[0].Update = canaryUpdate 3234 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3235 job.TaskGroups[0].Name = "tg2" 3236 3237 // Create 10 allocations from the old job for each tg 3238 var allocs []*structs.Allocation 3239 for j := 0; j < 2; j++ { 3240 for i := 0; i < 10; i++ { 3241 alloc := mock.Alloc() 3242 alloc.Job = job 3243 alloc.JobID = job.ID 3244 alloc.NodeID = uuid.Generate() 3245 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i)) 3246 alloc.TaskGroup = job.TaskGroups[j].Name 3247 allocs = append(allocs, alloc) 3248 } 3249 } 3250 3251 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3252 r := reconciler.Compute() 3253 3254 newD := structs.NewDeployment(job) 3255 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3256 state := &structs.DeploymentState{ 3257 DesiredCanaries: 2, 3258 DesiredTotal: 10, 3259 } 3260 newD.TaskGroups[job.TaskGroups[0].Name] = state 3261 newD.TaskGroups[job.TaskGroups[1].Name] = state.Copy() 3262 3263 // Assert the correct results 3264 assertResults(t, r, &resultExpectation{ 3265 createDeployment: newD, 3266 deploymentUpdates: nil, 3267 place: 4, 3268 inplace: 0, 3269 stop: 0, 3270 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3271 job.TaskGroups[0].Name: { 3272 Canary: 2, 3273 Ignore: 10, 3274 }, 3275 job.TaskGroups[1].Name: { 3276 Canary: 2, 3277 Ignore: 10, 3278 }, 3279 }, 3280 }) 3281 3282 assertNamesHaveIndexes(t, intRange(0, 1, 0, 1), placeResultsToNames(r.place)) 3283 } 3284 3285 // Tests the reconciler creates new canaries when the job changes and scales up 3286 func TestReconciler_NewCanaries_ScaleUp(t *testing.T) { 3287 // Scale the job up to 15 3288 job := mock.Job() 3289 job.TaskGroups[0].Update = canaryUpdate 3290 job.TaskGroups[0].Count = 15 3291 3292 // Create 10 allocations from the old job 3293 var allocs []*structs.Allocation 3294 for i := 0; i < 10; i++ { 3295 alloc := mock.Alloc() 3296 alloc.Job = job 3297 alloc.JobID = job.ID 3298 alloc.NodeID = uuid.Generate() 3299 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3300 alloc.TaskGroup = job.TaskGroups[0].Name 3301 allocs = append(allocs, alloc) 3302 } 3303 3304 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3305 r := reconciler.Compute() 3306 3307 newD := structs.NewDeployment(job) 3308 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3309 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3310 DesiredCanaries: 2, 3311 DesiredTotal: 15, 3312 } 3313 3314 // Assert the correct results 3315 assertResults(t, r, &resultExpectation{ 3316 createDeployment: newD, 3317 deploymentUpdates: nil, 3318 place: 2, 3319 inplace: 0, 3320 stop: 0, 3321 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3322 job.TaskGroups[0].Name: { 3323 Canary: 2, 3324 Ignore: 10, 3325 }, 3326 }, 3327 }) 3328 3329 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3330 } 3331 3332 // Tests the reconciler creates new canaries when the job changes and scales 3333 // down 3334 func TestReconciler_NewCanaries_ScaleDown(t *testing.T) { 3335 // Scale the job down to 5 3336 job := mock.Job() 3337 job.TaskGroups[0].Update = canaryUpdate 3338 job.TaskGroups[0].Count = 5 3339 3340 // Create 10 allocations from the old job 3341 var allocs []*structs.Allocation 3342 for i := 0; i < 10; i++ { 3343 alloc := mock.Alloc() 3344 alloc.Job = job 3345 alloc.JobID = job.ID 3346 alloc.NodeID = uuid.Generate() 3347 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3348 alloc.TaskGroup = job.TaskGroups[0].Name 3349 allocs = append(allocs, alloc) 3350 } 3351 3352 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 3353 r := reconciler.Compute() 3354 3355 newD := structs.NewDeployment(job) 3356 newD.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion 3357 newD.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3358 DesiredCanaries: 2, 3359 DesiredTotal: 5, 3360 } 3361 3362 // Assert the correct results 3363 assertResults(t, r, &resultExpectation{ 3364 createDeployment: newD, 3365 deploymentUpdates: nil, 3366 place: 2, 3367 inplace: 0, 3368 stop: 5, 3369 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3370 job.TaskGroups[0].Name: { 3371 Canary: 2, 3372 Stop: 5, 3373 Ignore: 5, 3374 }, 3375 }, 3376 }) 3377 3378 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3379 assertNamesHaveIndexes(t, intRange(5, 9), stopResultsToNames(r.stop)) 3380 } 3381 3382 // Tests the reconciler handles filling the names of partially placed canaries 3383 func TestReconciler_NewCanaries_FillNames(t *testing.T) { 3384 job := mock.Job() 3385 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 3386 Canary: 4, 3387 MaxParallel: 2, 3388 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 3389 MinHealthyTime: 10 * time.Second, 3390 HealthyDeadline: 10 * time.Minute, 3391 } 3392 3393 // Create an existing deployment that has placed some canaries 3394 d := structs.NewDeployment(job) 3395 s := &structs.DeploymentState{ 3396 Promoted: false, 3397 DesiredTotal: 10, 3398 DesiredCanaries: 4, 3399 PlacedAllocs: 2, 3400 } 3401 d.TaskGroups[job.TaskGroups[0].Name] = s 3402 3403 // Create 10 allocations from the old job 3404 var allocs []*structs.Allocation 3405 for i := 0; i < 10; i++ { 3406 alloc := mock.Alloc() 3407 alloc.Job = job 3408 alloc.JobID = job.ID 3409 alloc.NodeID = uuid.Generate() 3410 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3411 alloc.TaskGroup = job.TaskGroups[0].Name 3412 allocs = append(allocs, alloc) 3413 } 3414 3415 // Create canaries but pick names at the ends 3416 for i := 0; i < 4; i += 3 { 3417 // Create one canary 3418 canary := mock.Alloc() 3419 canary.Job = job 3420 canary.JobID = job.ID 3421 canary.NodeID = uuid.Generate() 3422 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3423 canary.TaskGroup = job.TaskGroups[0].Name 3424 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3425 canary.DeploymentID = d.ID 3426 allocs = append(allocs, canary) 3427 } 3428 3429 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 3430 r := reconciler.Compute() 3431 3432 // Assert the correct results 3433 assertResults(t, r, &resultExpectation{ 3434 createDeployment: nil, 3435 deploymentUpdates: nil, 3436 place: 2, 3437 inplace: 0, 3438 stop: 0, 3439 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3440 job.TaskGroups[0].Name: { 3441 Canary: 2, 3442 Ignore: 12, 3443 }, 3444 }, 3445 }) 3446 3447 assertNamesHaveIndexes(t, intRange(1, 2), placeResultsToNames(r.place)) 3448 } 3449 3450 // Tests the reconciler handles canary promotion by unblocking max_parallel 3451 func TestReconciler_PromoteCanaries_Unblock(t *testing.T) { 3452 job := mock.Job() 3453 job.TaskGroups[0].Update = canaryUpdate 3454 3455 // Create an existing deployment that has placed some canaries and mark them 3456 // promoted 3457 d := structs.NewDeployment(job) 3458 s := &structs.DeploymentState{ 3459 Promoted: true, 3460 DesiredTotal: 10, 3461 DesiredCanaries: 2, 3462 PlacedAllocs: 2, 3463 } 3464 d.TaskGroups[job.TaskGroups[0].Name] = s 3465 3466 // Create 10 allocations from the old job 3467 var allocs []*structs.Allocation 3468 for i := 0; i < 10; i++ { 3469 alloc := mock.Alloc() 3470 alloc.Job = job 3471 alloc.JobID = job.ID 3472 alloc.NodeID = uuid.Generate() 3473 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3474 alloc.TaskGroup = job.TaskGroups[0].Name 3475 allocs = append(allocs, alloc) 3476 } 3477 3478 // Create the canaries 3479 handled := make(map[string]allocUpdateType) 3480 for i := 0; i < 2; i++ { 3481 // Create one canary 3482 canary := mock.Alloc() 3483 canary.Job = job 3484 canary.JobID = job.ID 3485 canary.NodeID = uuid.Generate() 3486 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3487 canary.TaskGroup = job.TaskGroups[0].Name 3488 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3489 canary.DeploymentID = d.ID 3490 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3491 Healthy: helper.BoolToPtr(true), 3492 } 3493 allocs = append(allocs, canary) 3494 handled[canary.ID] = allocUpdateFnIgnore 3495 } 3496 3497 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3498 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3499 r := reconciler.Compute() 3500 3501 // Assert the correct results 3502 assertResults(t, r, &resultExpectation{ 3503 createDeployment: nil, 3504 deploymentUpdates: nil, 3505 destructive: 2, 3506 stop: 2, 3507 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3508 job.TaskGroups[0].Name: { 3509 Stop: 2, 3510 DestructiveUpdate: 2, 3511 Ignore: 8, 3512 }, 3513 }, 3514 }) 3515 3516 assertNoCanariesStopped(t, d, r.stop) 3517 assertNamesHaveIndexes(t, intRange(2, 3), destructiveResultsToNames(r.destructiveUpdate)) 3518 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3519 } 3520 3521 // Tests the reconciler handles canary promotion when the canary count equals 3522 // the total correctly 3523 func TestReconciler_PromoteCanaries_CanariesEqualCount(t *testing.T) { 3524 job := mock.Job() 3525 job.TaskGroups[0].Update = canaryUpdate 3526 job.TaskGroups[0].Count = 2 3527 3528 // Create an existing deployment that has placed some canaries and mark them 3529 // promoted 3530 d := structs.NewDeployment(job) 3531 s := &structs.DeploymentState{ 3532 Promoted: true, 3533 DesiredTotal: 2, 3534 DesiredCanaries: 2, 3535 PlacedAllocs: 2, 3536 HealthyAllocs: 2, 3537 } 3538 d.TaskGroups[job.TaskGroups[0].Name] = s 3539 3540 // Create 2 allocations from the old job 3541 var allocs []*structs.Allocation 3542 for i := 0; i < 2; i++ { 3543 alloc := mock.Alloc() 3544 alloc.Job = job 3545 alloc.JobID = job.ID 3546 alloc.NodeID = uuid.Generate() 3547 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3548 alloc.TaskGroup = job.TaskGroups[0].Name 3549 allocs = append(allocs, alloc) 3550 } 3551 3552 // Create the canaries 3553 handled := make(map[string]allocUpdateType) 3554 for i := 0; i < 2; i++ { 3555 // Create one canary 3556 canary := mock.Alloc() 3557 canary.Job = job 3558 canary.JobID = job.ID 3559 canary.NodeID = uuid.Generate() 3560 canary.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3561 canary.TaskGroup = job.TaskGroups[0].Name 3562 s.PlacedCanaries = append(s.PlacedCanaries, canary.ID) 3563 canary.DeploymentID = d.ID 3564 canary.DeploymentStatus = &structs.AllocDeploymentStatus{ 3565 Healthy: helper.BoolToPtr(true), 3566 } 3567 allocs = append(allocs, canary) 3568 handled[canary.ID] = allocUpdateFnIgnore 3569 } 3570 3571 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3572 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3573 r := reconciler.Compute() 3574 3575 updates := []*structs.DeploymentStatusUpdate{ 3576 { 3577 DeploymentID: d.ID, 3578 Status: structs.DeploymentStatusSuccessful, 3579 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3580 }, 3581 } 3582 3583 // Assert the correct results 3584 assertResults(t, r, &resultExpectation{ 3585 createDeployment: nil, 3586 deploymentUpdates: updates, 3587 place: 0, 3588 inplace: 0, 3589 stop: 2, 3590 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3591 job.TaskGroups[0].Name: { 3592 Stop: 2, 3593 Ignore: 2, 3594 }, 3595 }, 3596 }) 3597 3598 assertNoCanariesStopped(t, d, r.stop) 3599 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3600 } 3601 3602 // Tests the reconciler checks the health of placed allocs to determine the 3603 // limit 3604 func TestReconciler_DeploymentLimit_HealthAccounting(t *testing.T) { 3605 job := mock.Job() 3606 job.TaskGroups[0].Update = noCanaryUpdate 3607 3608 cases := []struct { 3609 healthy int 3610 }{ 3611 { 3612 healthy: 0, 3613 }, 3614 { 3615 healthy: 1, 3616 }, 3617 { 3618 healthy: 2, 3619 }, 3620 { 3621 healthy: 3, 3622 }, 3623 { 3624 healthy: 4, 3625 }, 3626 } 3627 3628 for _, c := range cases { 3629 t.Run(fmt.Sprintf("%d healthy", c.healthy), func(t *testing.T) { 3630 // Create an existing deployment that has placed some canaries and mark them 3631 // promoted 3632 d := structs.NewDeployment(job) 3633 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3634 Promoted: true, 3635 DesiredTotal: 10, 3636 PlacedAllocs: 4, 3637 } 3638 3639 // Create 6 allocations from the old job 3640 var allocs []*structs.Allocation 3641 for i := 4; i < 10; i++ { 3642 alloc := mock.Alloc() 3643 alloc.Job = job 3644 alloc.JobID = job.ID 3645 alloc.NodeID = uuid.Generate() 3646 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3647 alloc.TaskGroup = job.TaskGroups[0].Name 3648 allocs = append(allocs, alloc) 3649 } 3650 3651 // Create the new allocs 3652 handled := make(map[string]allocUpdateType) 3653 for i := 0; i < 4; i++ { 3654 new := mock.Alloc() 3655 new.Job = job 3656 new.JobID = job.ID 3657 new.NodeID = uuid.Generate() 3658 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3659 new.TaskGroup = job.TaskGroups[0].Name 3660 new.DeploymentID = d.ID 3661 if i < c.healthy { 3662 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3663 Healthy: helper.BoolToPtr(true), 3664 } 3665 } 3666 allocs = append(allocs, new) 3667 handled[new.ID] = allocUpdateFnIgnore 3668 } 3669 3670 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3671 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 3672 r := reconciler.Compute() 3673 3674 // Assert the correct results 3675 assertResults(t, r, &resultExpectation{ 3676 createDeployment: nil, 3677 deploymentUpdates: nil, 3678 destructive: c.healthy, 3679 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3680 job.TaskGroups[0].Name: { 3681 DestructiveUpdate: uint64(c.healthy), 3682 Ignore: uint64(10 - c.healthy), 3683 }, 3684 }, 3685 }) 3686 3687 if c.healthy != 0 { 3688 assertNamesHaveIndexes(t, intRange(4, 3+c.healthy), destructiveResultsToNames(r.destructiveUpdate)) 3689 } 3690 }) 3691 } 3692 } 3693 3694 // Tests the reconciler handles an alloc on a tainted node during a rolling 3695 // update 3696 func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { 3697 job := mock.Job() 3698 job.TaskGroups[0].Update = noCanaryUpdate 3699 3700 // Create an existing deployment that has some placed allocs 3701 d := structs.NewDeployment(job) 3702 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3703 Promoted: true, 3704 DesiredTotal: 10, 3705 PlacedAllocs: 7, 3706 } 3707 3708 // Create 2 allocations from the old job 3709 var allocs []*structs.Allocation 3710 for i := 8; i < 10; i++ { 3711 alloc := mock.Alloc() 3712 alloc.Job = job 3713 alloc.JobID = job.ID 3714 alloc.NodeID = uuid.Generate() 3715 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3716 alloc.TaskGroup = job.TaskGroups[0].Name 3717 allocs = append(allocs, alloc) 3718 } 3719 3720 // Create the healthy replacements 3721 handled := make(map[string]allocUpdateType) 3722 for i := 0; i < 8; i++ { 3723 new := mock.Alloc() 3724 new.Job = job 3725 new.JobID = job.ID 3726 new.NodeID = uuid.Generate() 3727 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3728 new.TaskGroup = job.TaskGroups[0].Name 3729 new.DeploymentID = d.ID 3730 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3731 Healthy: helper.BoolToPtr(true), 3732 } 3733 allocs = append(allocs, new) 3734 handled[new.ID] = allocUpdateFnIgnore 3735 } 3736 3737 // Build a map of tainted nodes 3738 tainted := make(map[string]*structs.Node, 3) 3739 for i := 0; i < 3; i++ { 3740 n := mock.Node() 3741 n.ID = allocs[2+i].NodeID 3742 if i == 0 { 3743 n.Status = structs.NodeStatusDown 3744 } else { 3745 n.Drain = true 3746 allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3747 } 3748 tainted[n.ID] = n 3749 } 3750 3751 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3752 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3753 r := reconciler.Compute() 3754 3755 // Assert the correct results 3756 assertResults(t, r, &resultExpectation{ 3757 createDeployment: nil, 3758 deploymentUpdates: nil, 3759 place: 3, 3760 destructive: 2, 3761 stop: 3, 3762 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3763 job.TaskGroups[0].Name: { 3764 Place: 1, // Place the lost 3765 Stop: 1, // Stop the lost 3766 Migrate: 2, // Migrate the tainted 3767 DestructiveUpdate: 2, 3768 Ignore: 5, 3769 }, 3770 }, 3771 }) 3772 3773 assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate)) 3774 assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place)) 3775 assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) 3776 } 3777 3778 // Tests the reconciler handles a failed deployment with allocs on tainted 3779 // nodes 3780 func TestReconciler_FailedDeployment_TaintedNodes(t *testing.T) { 3781 job := mock.Job() 3782 job.TaskGroups[0].Update = noCanaryUpdate 3783 3784 // Create an existing failed deployment that has some placed allocs 3785 d := structs.NewDeployment(job) 3786 d.Status = structs.DeploymentStatusFailed 3787 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3788 Promoted: true, 3789 DesiredTotal: 10, 3790 PlacedAllocs: 4, 3791 } 3792 3793 // Create 6 allocations from the old job 3794 var allocs []*structs.Allocation 3795 for i := 4; i < 10; i++ { 3796 alloc := mock.Alloc() 3797 alloc.Job = job 3798 alloc.JobID = job.ID 3799 alloc.NodeID = uuid.Generate() 3800 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3801 alloc.TaskGroup = job.TaskGroups[0].Name 3802 allocs = append(allocs, alloc) 3803 } 3804 3805 // Create the healthy replacements 3806 handled := make(map[string]allocUpdateType) 3807 for i := 0; i < 4; i++ { 3808 new := mock.Alloc() 3809 new.Job = job 3810 new.JobID = job.ID 3811 new.NodeID = uuid.Generate() 3812 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3813 new.TaskGroup = job.TaskGroups[0].Name 3814 new.DeploymentID = d.ID 3815 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 3816 Healthy: helper.BoolToPtr(true), 3817 } 3818 allocs = append(allocs, new) 3819 handled[new.ID] = allocUpdateFnIgnore 3820 } 3821 3822 // Build a map of tainted nodes 3823 tainted := make(map[string]*structs.Node, 2) 3824 for i := 0; i < 2; i++ { 3825 n := mock.Node() 3826 n.ID = allocs[6+i].NodeID 3827 if i == 0 { 3828 n.Status = structs.NodeStatusDown 3829 } else { 3830 n.Drain = true 3831 allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true) 3832 } 3833 tainted[n.ID] = n 3834 } 3835 3836 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 3837 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, tainted, "") 3838 r := reconciler.Compute() 3839 3840 // Assert the correct results 3841 assertResults(t, r, &resultExpectation{ 3842 createDeployment: nil, 3843 deploymentUpdates: nil, 3844 place: 2, 3845 inplace: 0, 3846 stop: 2, 3847 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3848 job.TaskGroups[0].Name: { 3849 Place: 1, 3850 Migrate: 1, 3851 Stop: 1, 3852 Ignore: 8, 3853 }, 3854 }, 3855 }) 3856 3857 assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) 3858 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 3859 } 3860 3861 // Tests the reconciler handles a run after a deployment is complete 3862 // successfully. 3863 func TestReconciler_CompleteDeployment(t *testing.T) { 3864 job := mock.Job() 3865 job.TaskGroups[0].Update = canaryUpdate 3866 3867 d := structs.NewDeployment(job) 3868 d.Status = structs.DeploymentStatusSuccessful 3869 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3870 Promoted: true, 3871 DesiredTotal: 10, 3872 DesiredCanaries: 2, 3873 PlacedAllocs: 10, 3874 HealthyAllocs: 10, 3875 } 3876 3877 // Create allocations from the old job 3878 var allocs []*structs.Allocation 3879 for i := 0; i < 10; i++ { 3880 alloc := mock.Alloc() 3881 alloc.Job = job 3882 alloc.JobID = job.ID 3883 alloc.NodeID = uuid.Generate() 3884 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 3885 alloc.TaskGroup = job.TaskGroups[0].Name 3886 alloc.DeploymentID = d.ID 3887 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 3888 Healthy: helper.BoolToPtr(true), 3889 } 3890 allocs = append(allocs, alloc) 3891 } 3892 3893 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3894 r := reconciler.Compute() 3895 3896 // Assert the correct results 3897 assertResults(t, r, &resultExpectation{ 3898 createDeployment: nil, 3899 deploymentUpdates: nil, 3900 place: 0, 3901 inplace: 0, 3902 stop: 0, 3903 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3904 job.TaskGroups[0].Name: { 3905 Ignore: 10, 3906 }, 3907 }, 3908 }) 3909 } 3910 3911 // Tests that the reconciler marks a deployment as complete once there is 3912 // nothing left to place even if there are failed allocations that are part of 3913 // the deployment. 3914 func TestReconciler_MarkDeploymentComplete_FailedAllocations(t *testing.T) { 3915 job := mock.Job() 3916 job.TaskGroups[0].Update = noCanaryUpdate 3917 3918 d := structs.NewDeployment(job) 3919 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 3920 DesiredTotal: 10, 3921 PlacedAllocs: 20, 3922 HealthyAllocs: 10, 3923 } 3924 3925 // Create 10 healthy allocs and 10 allocs that are failed 3926 var allocs []*structs.Allocation 3927 for i := 0; i < 20; i++ { 3928 alloc := mock.Alloc() 3929 alloc.Job = job 3930 alloc.JobID = job.ID 3931 alloc.NodeID = uuid.Generate() 3932 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i%10)) 3933 alloc.TaskGroup = job.TaskGroups[0].Name 3934 alloc.DeploymentID = d.ID 3935 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{} 3936 if i < 10 { 3937 alloc.ClientStatus = structs.AllocClientStatusRunning 3938 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(true) 3939 } else { 3940 alloc.DesiredStatus = structs.AllocDesiredStatusStop 3941 alloc.ClientStatus = structs.AllocClientStatusFailed 3942 alloc.DeploymentStatus.Healthy = helper.BoolToPtr(false) 3943 } 3944 3945 allocs = append(allocs, alloc) 3946 } 3947 3948 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 3949 r := reconciler.Compute() 3950 3951 updates := []*structs.DeploymentStatusUpdate{ 3952 { 3953 DeploymentID: d.ID, 3954 Status: structs.DeploymentStatusSuccessful, 3955 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 3956 }, 3957 } 3958 3959 // Assert the correct results 3960 assertResults(t, r, &resultExpectation{ 3961 createDeployment: nil, 3962 deploymentUpdates: updates, 3963 place: 0, 3964 inplace: 0, 3965 stop: 0, 3966 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 3967 job.TaskGroups[0].Name: { 3968 Ignore: 10, 3969 }, 3970 }, 3971 }) 3972 } 3973 3974 // Test that a failed deployment cancels non-promoted canaries 3975 func TestReconciler_FailedDeployment_CancelCanaries(t *testing.T) { 3976 // Create a job with two task groups 3977 job := mock.Job() 3978 job.TaskGroups[0].Update = canaryUpdate 3979 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 3980 job.TaskGroups[1].Name = "two" 3981 3982 // Create an existing failed deployment that has promoted one task group 3983 d := structs.NewDeployment(job) 3984 d.Status = structs.DeploymentStatusFailed 3985 s0 := &structs.DeploymentState{ 3986 Promoted: true, 3987 DesiredTotal: 10, 3988 DesiredCanaries: 2, 3989 PlacedAllocs: 4, 3990 } 3991 s1 := &structs.DeploymentState{ 3992 Promoted: false, 3993 DesiredTotal: 10, 3994 DesiredCanaries: 2, 3995 PlacedAllocs: 2, 3996 } 3997 d.TaskGroups[job.TaskGroups[0].Name] = s0 3998 d.TaskGroups[job.TaskGroups[1].Name] = s1 3999 4000 // Create 6 allocations from the old job 4001 var allocs []*structs.Allocation 4002 handled := make(map[string]allocUpdateType) 4003 for _, group := range []int{0, 1} { 4004 replacements := 4 4005 state := s0 4006 if group == 1 { 4007 replacements = 2 4008 state = s1 4009 } 4010 4011 // Create the healthy replacements 4012 for i := 0; i < replacements; i++ { 4013 new := mock.Alloc() 4014 new.Job = job 4015 new.JobID = job.ID 4016 new.NodeID = uuid.Generate() 4017 new.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 4018 new.TaskGroup = job.TaskGroups[group].Name 4019 new.DeploymentID = d.ID 4020 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4021 Healthy: helper.BoolToPtr(true), 4022 } 4023 allocs = append(allocs, new) 4024 handled[new.ID] = allocUpdateFnIgnore 4025 4026 // Add the alloc to the canary list 4027 if i < 2 { 4028 state.PlacedCanaries = append(state.PlacedCanaries, new.ID) 4029 } 4030 } 4031 for i := replacements; i < 10; i++ { 4032 alloc := mock.Alloc() 4033 alloc.Job = job 4034 alloc.JobID = job.ID 4035 alloc.NodeID = uuid.Generate() 4036 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[group].Name, uint(i)) 4037 alloc.TaskGroup = job.TaskGroups[group].Name 4038 allocs = append(allocs, alloc) 4039 } 4040 } 4041 4042 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 4043 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 4044 r := reconciler.Compute() 4045 4046 // Assert the correct results 4047 assertResults(t, r, &resultExpectation{ 4048 createDeployment: nil, 4049 deploymentUpdates: nil, 4050 place: 0, 4051 inplace: 0, 4052 stop: 2, 4053 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4054 job.TaskGroups[0].Name: { 4055 Ignore: 10, 4056 }, 4057 job.TaskGroups[1].Name: { 4058 Stop: 2, 4059 Ignore: 8, 4060 }, 4061 }, 4062 }) 4063 4064 assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) 4065 } 4066 4067 // Test that a failed deployment and updated job works 4068 func TestReconciler_FailedDeployment_NewJob(t *testing.T) { 4069 job := mock.Job() 4070 job.TaskGroups[0].Update = noCanaryUpdate 4071 4072 // Create an existing failed deployment that has some placed allocs 4073 d := structs.NewDeployment(job) 4074 d.Status = structs.DeploymentStatusFailed 4075 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4076 Promoted: true, 4077 DesiredTotal: 10, 4078 PlacedAllocs: 4, 4079 } 4080 4081 // Create 6 allocations from the old job 4082 var allocs []*structs.Allocation 4083 for i := 4; i < 10; i++ { 4084 alloc := mock.Alloc() 4085 alloc.Job = job 4086 alloc.JobID = job.ID 4087 alloc.NodeID = uuid.Generate() 4088 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4089 alloc.TaskGroup = job.TaskGroups[0].Name 4090 allocs = append(allocs, alloc) 4091 } 4092 4093 // Create the healthy replacements 4094 for i := 0; i < 4; i++ { 4095 new := mock.Alloc() 4096 new.Job = job 4097 new.JobID = job.ID 4098 new.NodeID = uuid.Generate() 4099 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4100 new.TaskGroup = job.TaskGroups[0].Name 4101 new.DeploymentID = d.ID 4102 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4103 Healthy: helper.BoolToPtr(true), 4104 } 4105 allocs = append(allocs, new) 4106 } 4107 4108 // Up the job version 4109 jobNew := job.Copy() 4110 jobNew.Version += 100 4111 4112 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, jobNew, d, allocs, nil, "") 4113 r := reconciler.Compute() 4114 4115 dnew := structs.NewDeployment(jobNew) 4116 dnew.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4117 DesiredTotal: 10, 4118 } 4119 4120 // Assert the correct results 4121 assertResults(t, r, &resultExpectation{ 4122 createDeployment: dnew, 4123 deploymentUpdates: nil, 4124 destructive: 4, 4125 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4126 job.TaskGroups[0].Name: { 4127 DestructiveUpdate: 4, 4128 Ignore: 6, 4129 }, 4130 }, 4131 }) 4132 4133 assertNamesHaveIndexes(t, intRange(0, 3), destructiveResultsToNames(r.destructiveUpdate)) 4134 } 4135 4136 // Tests the reconciler marks a deployment as complete 4137 func TestReconciler_MarkDeploymentComplete(t *testing.T) { 4138 job := mock.Job() 4139 job.TaskGroups[0].Update = noCanaryUpdate 4140 4141 d := structs.NewDeployment(job) 4142 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4143 Promoted: true, 4144 DesiredTotal: 10, 4145 PlacedAllocs: 10, 4146 HealthyAllocs: 10, 4147 } 4148 4149 // Create allocations from the old job 4150 var allocs []*structs.Allocation 4151 for i := 0; i < 10; i++ { 4152 alloc := mock.Alloc() 4153 alloc.Job = job 4154 alloc.JobID = job.ID 4155 alloc.NodeID = uuid.Generate() 4156 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4157 alloc.TaskGroup = job.TaskGroups[0].Name 4158 alloc.DeploymentID = d.ID 4159 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{ 4160 Healthy: helper.BoolToPtr(true), 4161 } 4162 allocs = append(allocs, alloc) 4163 } 4164 4165 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, d, allocs, nil, "") 4166 r := reconciler.Compute() 4167 4168 updates := []*structs.DeploymentStatusUpdate{ 4169 { 4170 DeploymentID: d.ID, 4171 Status: structs.DeploymentStatusSuccessful, 4172 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 4173 }, 4174 } 4175 4176 // Assert the correct results 4177 assertResults(t, r, &resultExpectation{ 4178 createDeployment: nil, 4179 deploymentUpdates: updates, 4180 place: 0, 4181 inplace: 0, 4182 stop: 0, 4183 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4184 job.TaskGroups[0].Name: { 4185 Ignore: 10, 4186 }, 4187 }, 4188 }) 4189 } 4190 4191 // Tests the reconciler handles changing a job such that a deployment is created 4192 // while doing a scale up but as the second eval. 4193 func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { 4194 // Scale the job up to 15 4195 job := mock.Job() 4196 job.TaskGroups[0].Update = noCanaryUpdate 4197 job.TaskGroups[0].Count = 30 4198 4199 // Create a deployment that is paused and has placed some canaries 4200 d := structs.NewDeployment(job) 4201 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4202 Promoted: false, 4203 DesiredTotal: 30, 4204 PlacedAllocs: 20, 4205 } 4206 4207 // Create 10 allocations from the old job 4208 var allocs []*structs.Allocation 4209 for i := 0; i < 10; i++ { 4210 alloc := mock.Alloc() 4211 alloc.Job = job 4212 alloc.JobID = job.ID 4213 alloc.NodeID = uuid.Generate() 4214 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4215 alloc.TaskGroup = job.TaskGroups[0].Name 4216 allocs = append(allocs, alloc) 4217 } 4218 4219 // Create 20 from new job 4220 handled := make(map[string]allocUpdateType) 4221 for i := 10; i < 30; i++ { 4222 alloc := mock.Alloc() 4223 alloc.Job = job 4224 alloc.JobID = job.ID 4225 alloc.DeploymentID = d.ID 4226 alloc.NodeID = uuid.Generate() 4227 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4228 alloc.TaskGroup = job.TaskGroups[0].Name 4229 allocs = append(allocs, alloc) 4230 handled[alloc.ID] = allocUpdateFnIgnore 4231 } 4232 4233 mockUpdateFn := allocUpdateFnMock(handled, allocUpdateFnDestructive) 4234 reconciler := NewAllocReconciler(testlog.HCLogger(t), mockUpdateFn, false, job.ID, job, d, allocs, nil, "") 4235 r := reconciler.Compute() 4236 4237 // Assert the correct results 4238 assertResults(t, r, &resultExpectation{ 4239 createDeployment: nil, 4240 deploymentUpdates: nil, 4241 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4242 job.TaskGroups[0].Name: { 4243 // All should be ignored because nothing has been marked as 4244 // healthy. 4245 Ignore: 30, 4246 }, 4247 }, 4248 }) 4249 } 4250 4251 // Tests the reconciler doesn't stop allocations when doing a rolling upgrade 4252 // where the count of the old job allocs is < desired count. 4253 func TestReconciler_RollingUpgrade_MissingAllocs(t *testing.T) { 4254 job := mock.Job() 4255 job.TaskGroups[0].Update = noCanaryUpdate 4256 4257 // Create 7 allocations from the old job 4258 var allocs []*structs.Allocation 4259 for i := 0; i < 7; i++ { 4260 alloc := mock.Alloc() 4261 alloc.Job = job 4262 alloc.JobID = job.ID 4263 alloc.NodeID = uuid.Generate() 4264 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4265 alloc.TaskGroup = job.TaskGroups[0].Name 4266 allocs = append(allocs, alloc) 4267 } 4268 4269 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, nil, allocs, nil, "") 4270 r := reconciler.Compute() 4271 4272 d := structs.NewDeployment(job) 4273 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4274 DesiredTotal: 10, 4275 } 4276 4277 // Assert the correct results 4278 assertResults(t, r, &resultExpectation{ 4279 createDeployment: d, 4280 deploymentUpdates: nil, 4281 place: 3, 4282 destructive: 1, 4283 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4284 job.TaskGroups[0].Name: { 4285 Place: 3, 4286 DestructiveUpdate: 1, 4287 Ignore: 6, 4288 }, 4289 }, 4290 }) 4291 4292 assertNamesHaveIndexes(t, intRange(7, 9), placeResultsToNames(r.place)) 4293 assertNamesHaveIndexes(t, intRange(0, 0), destructiveResultsToNames(r.destructiveUpdate)) 4294 } 4295 4296 // Tests that the reconciler handles rerunning a batch job in the case that the 4297 // allocations are from an older instance of the job. 4298 func TestReconciler_Batch_Rerun(t *testing.T) { 4299 job := mock.Job() 4300 job.Type = structs.JobTypeBatch 4301 job.TaskGroups[0].Update = nil 4302 4303 // Create 10 allocations from the old job and have them be complete 4304 var allocs []*structs.Allocation 4305 for i := 0; i < 10; i++ { 4306 alloc := mock.Alloc() 4307 alloc.Job = job 4308 alloc.JobID = job.ID 4309 alloc.NodeID = uuid.Generate() 4310 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4311 alloc.TaskGroup = job.TaskGroups[0].Name 4312 alloc.ClientStatus = structs.AllocClientStatusComplete 4313 alloc.DesiredStatus = structs.AllocDesiredStatusStop 4314 allocs = append(allocs, alloc) 4315 } 4316 4317 // Create a copy of the job that is "new" 4318 job2 := job.Copy() 4319 job2.CreateIndex++ 4320 4321 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, true, job2.ID, job2, nil, allocs, nil, "") 4322 r := reconciler.Compute() 4323 4324 // Assert the correct results 4325 assertResults(t, r, &resultExpectation{ 4326 createDeployment: nil, 4327 deploymentUpdates: nil, 4328 place: 10, 4329 destructive: 0, 4330 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4331 job.TaskGroups[0].Name: { 4332 Place: 10, 4333 DestructiveUpdate: 0, 4334 Ignore: 10, 4335 }, 4336 }, 4337 }) 4338 4339 assertNamesHaveIndexes(t, intRange(0, 9), placeResultsToNames(r.place)) 4340 } 4341 4342 // Test that a failed deployment will not result in rescheduling failed allocations 4343 func TestReconciler_FailedDeployment_DontReschedule(t *testing.T) { 4344 job := mock.Job() 4345 job.TaskGroups[0].Update = noCanaryUpdate 4346 4347 tgName := job.TaskGroups[0].Name 4348 now := time.Now() 4349 // Create an existing failed deployment that has some placed allocs 4350 d := structs.NewDeployment(job) 4351 d.Status = structs.DeploymentStatusFailed 4352 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4353 Promoted: true, 4354 DesiredTotal: 5, 4355 PlacedAllocs: 4, 4356 } 4357 4358 // Create 4 allocations and mark two as failed 4359 var allocs []*structs.Allocation 4360 for i := 0; i < 4; i++ { 4361 alloc := mock.Alloc() 4362 alloc.Job = job 4363 alloc.JobID = job.ID 4364 alloc.NodeID = uuid.Generate() 4365 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4366 alloc.TaskGroup = job.TaskGroups[0].Name 4367 alloc.DeploymentID = d.ID 4368 allocs = append(allocs, alloc) 4369 } 4370 4371 //create some allocations that are reschedulable now 4372 allocs[2].ClientStatus = structs.AllocClientStatusFailed 4373 allocs[2].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4374 StartedAt: now.Add(-1 * time.Hour), 4375 FinishedAt: now.Add(-10 * time.Second)}} 4376 4377 allocs[3].ClientStatus = structs.AllocClientStatusFailed 4378 allocs[3].TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4379 StartedAt: now.Add(-1 * time.Hour), 4380 FinishedAt: now.Add(-10 * time.Second)}} 4381 4382 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4383 r := reconciler.Compute() 4384 4385 // Assert that no rescheduled placements were created 4386 assertResults(t, r, &resultExpectation{ 4387 place: 0, 4388 createDeployment: nil, 4389 deploymentUpdates: nil, 4390 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4391 job.TaskGroups[0].Name: { 4392 Ignore: 2, 4393 }, 4394 }, 4395 }) 4396 } 4397 4398 // Test that a running deployment with failed allocs will not result in 4399 // rescheduling failed allocations unless they are marked as reschedulable. 4400 func TestReconciler_DeploymentWithFailedAllocs_DontReschedule(t *testing.T) { 4401 job := mock.Job() 4402 job.TaskGroups[0].Update = noCanaryUpdate 4403 tgName := job.TaskGroups[0].Name 4404 now := time.Now() 4405 4406 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 4407 d := structs.NewDeployment(job) 4408 d.Status = structs.DeploymentStatusRunning 4409 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4410 Promoted: false, 4411 DesiredTotal: 10, 4412 PlacedAllocs: 10, 4413 } 4414 4415 // Create 10 allocations 4416 var allocs []*structs.Allocation 4417 for i := 0; i < 10; i++ { 4418 alloc := mock.Alloc() 4419 alloc.Job = job 4420 alloc.JobID = job.ID 4421 alloc.NodeID = uuid.Generate() 4422 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4423 alloc.TaskGroup = job.TaskGroups[0].Name 4424 alloc.DeploymentID = d.ID 4425 alloc.ClientStatus = structs.AllocClientStatusFailed 4426 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4427 StartedAt: now.Add(-1 * time.Hour), 4428 FinishedAt: now.Add(-10 * time.Second)}} 4429 allocs = append(allocs, alloc) 4430 } 4431 4432 // Mark half of them as reschedulable 4433 for i := 0; i < 5; i++ { 4434 allocs[i].DesiredTransition.Reschedule = helper.BoolToPtr(true) 4435 } 4436 4437 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4438 r := reconciler.Compute() 4439 4440 // Assert that no rescheduled placements were created 4441 assertResults(t, r, &resultExpectation{ 4442 place: 5, 4443 createDeployment: nil, 4444 deploymentUpdates: nil, 4445 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4446 job.TaskGroups[0].Name: { 4447 Place: 5, 4448 Ignore: 5, 4449 }, 4450 }, 4451 }) 4452 } 4453 4454 // Test that a failed deployment cancels non-promoted canaries 4455 func TestReconciler_FailedDeployment_AutoRevert_CancelCanaries(t *testing.T) { 4456 // Create a job 4457 job := mock.Job() 4458 job.TaskGroups[0].Count = 3 4459 job.TaskGroups[0].Update = &structs.UpdateStrategy{ 4460 Canary: 3, 4461 MaxParallel: 2, 4462 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 4463 MinHealthyTime: 10 * time.Second, 4464 HealthyDeadline: 10 * time.Minute, 4465 Stagger: 31 * time.Second, 4466 } 4467 4468 // Create v1 of the job 4469 jobv1 := job.Copy() 4470 jobv1.Version = 1 4471 jobv1.TaskGroups[0].Meta = map[string]string{"version": "1"} 4472 4473 // Create v2 of the job 4474 jobv2 := job.Copy() 4475 jobv2.Version = 2 4476 jobv2.TaskGroups[0].Meta = map[string]string{"version": "2"} 4477 4478 d := structs.NewDeployment(jobv2) 4479 state := &structs.DeploymentState{ 4480 Promoted: true, 4481 DesiredTotal: 3, 4482 PlacedAllocs: 3, 4483 HealthyAllocs: 3, 4484 } 4485 d.TaskGroups[job.TaskGroups[0].Name] = state 4486 4487 // Create the original 4488 var allocs []*structs.Allocation 4489 for i := 0; i < 3; i++ { 4490 new := mock.Alloc() 4491 new.Job = jobv2 4492 new.JobID = job.ID 4493 new.NodeID = uuid.Generate() 4494 new.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4495 new.TaskGroup = job.TaskGroups[0].Name 4496 new.DeploymentID = d.ID 4497 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4498 Healthy: helper.BoolToPtr(true), 4499 } 4500 new.ClientStatus = structs.AllocClientStatusRunning 4501 allocs = append(allocs, new) 4502 4503 } 4504 for i := 0; i < 3; i++ { 4505 new := mock.Alloc() 4506 new.Job = jobv1 4507 new.JobID = jobv1.ID 4508 new.NodeID = uuid.Generate() 4509 new.Name = structs.AllocName(jobv1.ID, jobv1.TaskGroups[0].Name, uint(i)) 4510 new.TaskGroup = job.TaskGroups[0].Name 4511 new.DeploymentID = uuid.Generate() 4512 new.DeploymentStatus = &structs.AllocDeploymentStatus{ 4513 Healthy: helper.BoolToPtr(false), 4514 } 4515 new.DesiredStatus = structs.AllocDesiredStatusStop 4516 new.ClientStatus = structs.AllocClientStatusFailed 4517 allocs = append(allocs, new) 4518 } 4519 4520 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, jobv2, d, allocs, nil, "") 4521 r := reconciler.Compute() 4522 4523 updates := []*structs.DeploymentStatusUpdate{ 4524 { 4525 DeploymentID: d.ID, 4526 Status: structs.DeploymentStatusSuccessful, 4527 StatusDescription: structs.DeploymentStatusDescriptionSuccessful, 4528 }, 4529 } 4530 4531 // Assert the correct results 4532 assertResults(t, r, &resultExpectation{ 4533 createDeployment: nil, 4534 deploymentUpdates: updates, 4535 place: 0, 4536 inplace: 0, 4537 stop: 0, 4538 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4539 job.TaskGroups[0].Name: { 4540 Stop: 0, 4541 InPlaceUpdate: 0, 4542 Ignore: 3, 4543 }, 4544 }, 4545 }) 4546 } 4547 4548 // Test that a successful deployment with failed allocs will result in 4549 // rescheduling failed allocations 4550 func TestReconciler_SuccessfulDeploymentWithFailedAllocs_Reschedule(t *testing.T) { 4551 job := mock.Job() 4552 job.TaskGroups[0].Update = noCanaryUpdate 4553 tgName := job.TaskGroups[0].Name 4554 now := time.Now() 4555 4556 // Mock deployment with failed allocs, but deployment watcher hasn't marked it as failed yet 4557 d := structs.NewDeployment(job) 4558 d.Status = structs.DeploymentStatusSuccessful 4559 d.TaskGroups[job.TaskGroups[0].Name] = &structs.DeploymentState{ 4560 Promoted: false, 4561 DesiredTotal: 10, 4562 PlacedAllocs: 10, 4563 } 4564 4565 // Create 10 allocations 4566 var allocs []*structs.Allocation 4567 for i := 0; i < 10; i++ { 4568 alloc := mock.Alloc() 4569 alloc.Job = job 4570 alloc.JobID = job.ID 4571 alloc.NodeID = uuid.Generate() 4572 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4573 alloc.TaskGroup = job.TaskGroups[0].Name 4574 alloc.DeploymentID = d.ID 4575 alloc.ClientStatus = structs.AllocClientStatusFailed 4576 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "start", 4577 StartedAt: now.Add(-1 * time.Hour), 4578 FinishedAt: now.Add(-10 * time.Second)}} 4579 allocs = append(allocs, alloc) 4580 } 4581 4582 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnDestructive, false, job.ID, job, d, allocs, nil, "") 4583 r := reconciler.Compute() 4584 4585 // Assert that rescheduled placements were created 4586 assertResults(t, r, &resultExpectation{ 4587 place: 10, 4588 createDeployment: nil, 4589 deploymentUpdates: nil, 4590 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4591 job.TaskGroups[0].Name: { 4592 Place: 10, 4593 Ignore: 0, 4594 }, 4595 }, 4596 }) 4597 assertPlaceResultsHavePreviousAllocs(t, 10, r.place) 4598 } 4599 4600 // Tests force rescheduling a failed alloc that is past its reschedule limit 4601 func TestReconciler_ForceReschedule_Service(t *testing.T) { 4602 require := require.New(t) 4603 4604 // Set desired 5 4605 job := mock.Job() 4606 job.TaskGroups[0].Count = 5 4607 tgName := job.TaskGroups[0].Name 4608 4609 // Set up reschedule policy and update stanza 4610 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 4611 Attempts: 1, 4612 Interval: 24 * time.Hour, 4613 Delay: 5 * time.Second, 4614 DelayFunction: "", 4615 MaxDelay: 1 * time.Hour, 4616 Unlimited: false, 4617 } 4618 job.TaskGroups[0].Update = noCanaryUpdate 4619 4620 // Create 5 existing allocations 4621 var allocs []*structs.Allocation 4622 for i := 0; i < 5; i++ { 4623 alloc := mock.Alloc() 4624 alloc.Job = job 4625 alloc.JobID = job.ID 4626 alloc.NodeID = uuid.Generate() 4627 alloc.Name = structs.AllocName(job.ID, job.TaskGroups[0].Name, uint(i)) 4628 allocs = append(allocs, alloc) 4629 alloc.ClientStatus = structs.AllocClientStatusRunning 4630 } 4631 4632 // Mark one as failed and past its reschedule limit so not eligible to reschedule 4633 allocs[0].ClientStatus = structs.AllocClientStatusFailed 4634 allocs[0].RescheduleTracker = &structs.RescheduleTracker{Events: []*structs.RescheduleEvent{ 4635 {RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(), 4636 PrevAllocID: uuid.Generate(), 4637 PrevNodeID: uuid.Generate(), 4638 }, 4639 }} 4640 4641 // Mark DesiredTransition ForceReschedule 4642 allocs[0].DesiredTransition = structs.DesiredTransition{ForceReschedule: helper.BoolToPtr(true)} 4643 4644 reconciler := NewAllocReconciler(testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, nil, "") 4645 r := reconciler.Compute() 4646 4647 // Verify that no follow up evals were created 4648 evals := r.desiredFollowupEvals[tgName] 4649 require.Nil(evals) 4650 4651 // Verify that one rescheduled alloc was created because of the forced reschedule 4652 assertResults(t, r, &resultExpectation{ 4653 createDeployment: nil, 4654 deploymentUpdates: nil, 4655 place: 1, 4656 inplace: 0, 4657 stop: 0, 4658 desiredTGUpdates: map[string]*structs.DesiredUpdates{ 4659 job.TaskGroups[0].Name: { 4660 Place: 1, 4661 Ignore: 4, 4662 }, 4663 }, 4664 }) 4665 4666 // Rescheduled allocs should have previous allocs 4667 assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place)) 4668 assertPlaceResultsHavePreviousAllocs(t, 1, r.place) 4669 assertPlacementsAreRescheduled(t, 1, r.place) 4670 }