github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/drainer/watch_jobs_test.go (about) 1 package drainer 2 3 import ( 4 "context" 5 "testing" 6 "time" 7 8 "github.com/hashicorp/nomad/ci" 9 "github.com/hashicorp/nomad/helper/pointer" 10 "github.com/hashicorp/nomad/helper/testlog" 11 "github.com/hashicorp/nomad/helper/uuid" 12 "github.com/hashicorp/nomad/nomad/mock" 13 "github.com/hashicorp/nomad/nomad/state" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 "golang.org/x/time/rate" 18 ) 19 20 func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) { 21 n1 := mock.Node() 22 n1.Name = "draining" 23 n1.DrainStrategy = &structs.DrainStrategy{ 24 DrainSpec: structs.DrainSpec{ 25 Deadline: time.Minute, 26 }, 27 ForceDeadline: time.Now().Add(time.Minute), 28 } 29 require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 100, n1)) 30 31 // Create a non-draining node 32 n2 := mock.Node() 33 n2.Name = "running" 34 require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 101, n2)) 35 return n1, n2 36 } 37 38 func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) { 39 t.Helper() 40 41 limiter := rate.NewLimiter(100.0, 100) 42 logger := testlog.HCLogger(t) 43 ctx, cancel := context.WithCancel(context.Background()) 44 w := NewDrainingJobWatcher(ctx, limiter, state, logger) 45 return w, cancel 46 } 47 48 // TestDrainingJobWatcher_Interface is a compile-time assertion that we 49 // implement the intended interface. 50 func TestDrainingJobWatcher_Interface(t *testing.T) { 51 ci.Parallel(t) 52 53 w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t)) 54 cancel() 55 var _ DrainingJobWatcher = w 56 } 57 58 // asertJobWatcherOps asserts a certain number of allocs are drained and/or 59 // migrated by the job watcher. 60 func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) ( 61 *DrainRequest, []*structs.Allocation) { 62 t.Helper() 63 var ( 64 drains *DrainRequest 65 migrations []*structs.Allocation 66 drainsChecked, migrationsChecked bool 67 ) 68 for { 69 select { 70 case drains = <-jw.Drain(): 71 ids := make([]string, len(drains.Allocs)) 72 for i, a := range drains.Allocs { 73 ids[i] = a.JobID[:6] + ":" + a.ID[:6] 74 } 75 t.Logf("draining %d allocs: %v", len(ids), ids) 76 require.False(t, drainsChecked, "drains already received") 77 drainsChecked = true 78 require.Lenf(t, drains.Allocs, drained, 79 "expected %d drains but found %d", drained, len(drains.Allocs)) 80 case migrations = <-jw.Migrated(): 81 ids := make([]string, len(migrations)) 82 for i, a := range migrations { 83 ids[i] = a.JobID[:6] + ":" + a.ID[:6] 84 } 85 t.Logf("migrating %d allocs: %v", len(ids), ids) 86 require.False(t, migrationsChecked, "migrations already received") 87 migrationsChecked = true 88 require.Lenf(t, migrations, migrated, 89 "expected %d migrations but found %d", migrated, len(migrations)) 90 case <-time.After(10 * time.Millisecond): 91 if !drainsChecked && drained > 0 { 92 t.Fatalf("expected %d drains but none happened", drained) 93 } 94 if !migrationsChecked && migrated > 0 { 95 t.Fatalf("expected %d migrations but none happened", migrated) 96 } 97 return drains, migrations 98 } 99 } 100 } 101 102 // TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches 103 // allocation changes from multiple jobs. 104 func TestDrainingJobWatcher_DrainJobs(t *testing.T) { 105 ci.Parallel(t) 106 require := require.New(t) 107 108 state := state.TestStateStore(t) 109 jobWatcher, cancelWatcher := testDrainingJobWatcher(t, state) 110 defer cancelWatcher() 111 drainingNode, runningNode := testNodes(t, state) 112 113 var index uint64 = 101 114 count := 8 115 116 newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation { 117 a := mock.Alloc() 118 a.JobID = job.ID 119 a.Job = job 120 a.TaskGroup = job.TaskGroups[0].Name 121 a.NodeID = node.ID 122 return a 123 } 124 125 // 2 jobs with count 10, max parallel 3 126 jnss := make([]structs.NamespacedID, 2) 127 jobs := make([]*structs.Job, 2) 128 for i := 0; i < 2; i++ { 129 job := mock.Job() 130 jobs[i] = job 131 jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID} 132 job.TaskGroups[0].Migrate.MaxParallel = 3 133 job.TaskGroups[0].Count = count 134 require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, index, job)) 135 index++ 136 137 var allocs []*structs.Allocation 138 for i := 0; i < count; i++ { 139 a := newAlloc(drainingNode, job) 140 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 141 Healthy: pointer.Of(true), 142 } 143 allocs = append(allocs, a) 144 } 145 146 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, allocs)) 147 index++ 148 149 } 150 151 // Only register jobs with watcher after creating all data models as 152 // once the watcher starts we need to track the index carefully for 153 // updating the batch future 154 jobWatcher.RegisterJobs(jnss) 155 156 // Expect a first batch of MaxParallel allocs from each job 157 drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0) 158 159 // Fake migrating the drained allocs by starting new ones and stopping 160 // the old ones 161 drainedAllocs := make([]*structs.Allocation, len(drains.Allocs)) 162 for i, a := range drains.Allocs { 163 a.DesiredTransition.Migrate = pointer.Of(true) 164 165 // create a copy so we can reuse this slice 166 drainedAllocs[i] = a.Copy() 167 } 168 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs)) 169 drains.Resp.Respond(index, nil) 170 index++ 171 172 // Just setting ShouldMigrate should not cause any further drains 173 assertJobWatcherOps(t, jobWatcher, 0, 0) 174 175 // Proceed our fake migration along by creating new allocs and stopping 176 // old ones 177 replacements := make([]*structs.Allocation, len(drainedAllocs)) 178 updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2) 179 for i, a := range drainedAllocs { 180 // Stop drained allocs 181 a.DesiredTransition.Migrate = nil 182 a.DesiredStatus = structs.AllocDesiredStatusStop 183 184 // Create a replacement 185 replacement := mock.Alloc() 186 replacement.JobID = a.Job.ID 187 replacement.Job = a.Job 188 replacement.TaskGroup = a.TaskGroup 189 replacement.NodeID = runningNode.ID 190 // start in pending state with no health status 191 192 updates = append(updates, a, replacement) 193 replacements[i] = replacement.Copy() 194 } 195 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates)) 196 index++ 197 198 // The drained allocs stopping cause migrations but no new drains 199 // because the replacements have not started 200 assertJobWatcherOps(t, jobWatcher, 0, 6) 201 202 // Finally kickoff further drain activity by "starting" replacements 203 for _, a := range replacements { 204 a.ClientStatus = structs.AllocClientStatusRunning 205 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 206 Healthy: pointer.Of(true), 207 } 208 } 209 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements)) 210 index++ 211 212 require.NotEmpty(jobWatcher.drainingJobs()) 213 214 // 6 new drains 215 drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0) 216 217 // Fake migrations once more to finish the drain 218 drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) 219 for i, a := range drains.Allocs { 220 a.DesiredTransition.Migrate = pointer.Of(true) 221 222 // create a copy so we can reuse this slice 223 drainedAllocs[i] = a.Copy() 224 } 225 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs)) 226 drains.Resp.Respond(index, nil) 227 index++ 228 229 assertJobWatcherOps(t, jobWatcher, 0, 0) 230 231 replacements = make([]*structs.Allocation, len(drainedAllocs)) 232 updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) 233 for i, a := range drainedAllocs { 234 a.DesiredTransition.Migrate = nil 235 a.DesiredStatus = structs.AllocDesiredStatusStop 236 237 replacement := newAlloc(runningNode, a.Job) 238 updates = append(updates, a, replacement) 239 replacements[i] = replacement.Copy() 240 } 241 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates)) 242 index++ 243 244 assertJobWatcherOps(t, jobWatcher, 0, 6) 245 246 for _, a := range replacements { 247 a.ClientStatus = structs.AllocClientStatusRunning 248 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 249 Healthy: pointer.Of(true), 250 } 251 } 252 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements)) 253 index++ 254 255 require.NotEmpty(jobWatcher.drainingJobs()) 256 257 // Final 4 new drains 258 drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0) 259 260 // Fake migrations once more to finish the drain 261 drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) 262 for i, a := range drains.Allocs { 263 a.DesiredTransition.Migrate = pointer.Of(true) 264 265 // create a copy so we can reuse this slice 266 drainedAllocs[i] = a.Copy() 267 } 268 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs)) 269 drains.Resp.Respond(index, nil) 270 index++ 271 272 assertJobWatcherOps(t, jobWatcher, 0, 0) 273 274 replacements = make([]*structs.Allocation, len(drainedAllocs)) 275 updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) 276 for i, a := range drainedAllocs { 277 a.DesiredTransition.Migrate = nil 278 a.DesiredStatus = structs.AllocDesiredStatusStop 279 280 replacement := newAlloc(runningNode, a.Job) 281 updates = append(updates, a, replacement) 282 replacements[i] = replacement.Copy() 283 } 284 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates)) 285 index++ 286 287 assertJobWatcherOps(t, jobWatcher, 0, 4) 288 289 for _, a := range replacements { 290 a.ClientStatus = structs.AllocClientStatusRunning 291 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 292 Healthy: pointer.Of(true), 293 } 294 } 295 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements)) 296 297 // No jobs should be left! 298 require.Empty(jobWatcher.drainingJobs()) 299 } 300 301 // DrainingJobWatcher tests: 302 // TODO Test that the watcher cancels its query when a new job is registered 303 304 // handleTaskGroupTestCase is the test case struct for TestHandleTaskGroup 305 // 306 // Two nodes will be initialized: one draining and one running. 307 type handleTaskGroupTestCase struct { 308 // Name of test 309 Name string 310 311 // Batch uses a batch job and alloc 312 Batch bool 313 314 // Expectations 315 ExpectedDrained int 316 ExpectedMigrated int 317 ExpectedDone bool 318 319 // Count overrides the default count of 10 if set 320 Count int 321 322 // MaxParallel overrides the default max_parallel of 1 if set 323 MaxParallel int 324 325 // AddAlloc will be called 10 times to create test allocs 326 // 327 // Allocs default to be healthy on the draining node 328 AddAlloc func(i int, a *structs.Allocation, drainingID, runningID string) 329 } 330 331 func TestHandeTaskGroup_Table(t *testing.T) { 332 ci.Parallel(t) 333 334 cases := []handleTaskGroupTestCase{ 335 { 336 // All allocs on draining node 337 Name: "AllDraining", 338 ExpectedDrained: 1, 339 ExpectedMigrated: 0, 340 ExpectedDone: false, 341 }, 342 { 343 // All allocs on non-draining node 344 Name: "AllNonDraining", 345 ExpectedDrained: 0, 346 ExpectedMigrated: 0, 347 ExpectedDone: true, 348 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 349 a.NodeID = runningID 350 }, 351 }, 352 { 353 // Some allocs on non-draining node but not healthy 354 Name: "SomeNonDrainingUnhealthy", 355 ExpectedDrained: 0, 356 ExpectedMigrated: 0, 357 ExpectedDone: false, 358 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 359 if i%2 == 0 { 360 a.NodeID = runningID 361 a.DeploymentStatus = nil 362 } 363 }, 364 }, 365 { 366 // One draining, other allocs on non-draining node and healthy 367 Name: "OneDraining", 368 ExpectedDrained: 1, 369 ExpectedMigrated: 0, 370 ExpectedDone: false, 371 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 372 if i != 0 { 373 a.NodeID = runningID 374 } 375 }, 376 }, 377 { 378 // One already draining, other allocs on non-draining node and healthy 379 Name: "OneAlreadyDraining", 380 ExpectedDrained: 0, 381 ExpectedMigrated: 0, 382 ExpectedDone: false, 383 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 384 if i == 0 { 385 a.DesiredTransition.Migrate = pointer.Of(true) 386 return 387 } 388 a.NodeID = runningID 389 }, 390 }, 391 { 392 // One already drained, other allocs on non-draining node and healthy 393 Name: "OneAlreadyDrained", 394 ExpectedDrained: 0, 395 ExpectedMigrated: 1, 396 ExpectedDone: true, 397 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 398 if i == 0 { 399 a.DesiredStatus = structs.AllocDesiredStatusStop 400 return 401 } 402 a.NodeID = runningID 403 }, 404 }, 405 { 406 // One already drained, other allocs on non-draining node and healthy 407 Name: "OneAlreadyDrainedBatched", 408 Batch: true, 409 ExpectedDrained: 0, 410 ExpectedMigrated: 1, 411 ExpectedDone: true, 412 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 413 if i == 0 { 414 a.DesiredStatus = structs.AllocDesiredStatusStop 415 return 416 } 417 a.NodeID = runningID 418 }, 419 }, 420 { 421 // All allocs are terminl, nothing to be drained 422 Name: "AllMigrating", 423 ExpectedDrained: 0, 424 ExpectedMigrated: 10, 425 ExpectedDone: true, 426 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 427 a.DesiredStatus = structs.AllocDesiredStatusStop 428 }, 429 }, 430 { 431 // All allocs are terminl, nothing to be drained 432 Name: "AllMigratingBatch", 433 Batch: true, 434 ExpectedDrained: 0, 435 ExpectedMigrated: 10, 436 ExpectedDone: true, 437 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 438 a.DesiredStatus = structs.AllocDesiredStatusStop 439 }, 440 }, 441 { 442 // All allocs may be drained at once 443 Name: "AllAtOnce", 444 ExpectedDrained: 10, 445 ExpectedMigrated: 0, 446 ExpectedDone: false, 447 MaxParallel: 10, 448 }, 449 { 450 // Drain 2 451 Name: "Drain2", 452 ExpectedDrained: 2, 453 ExpectedMigrated: 0, 454 ExpectedDone: false, 455 MaxParallel: 2, 456 }, 457 { 458 // One on new node, one drained, and one draining 459 ExpectedDrained: 1, 460 ExpectedMigrated: 1, 461 MaxParallel: 2, 462 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 463 switch i { 464 case 0: 465 // One alloc on running node 466 a.NodeID = runningID 467 case 1: 468 // One alloc already migrated 469 a.DesiredStatus = structs.AllocDesiredStatusStop 470 } 471 }, 472 }, 473 { 474 // 8 on new node, one drained, and one draining 475 ExpectedDrained: 1, 476 ExpectedMigrated: 1, 477 MaxParallel: 2, 478 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 479 switch i { 480 case 0, 1, 2, 3, 4, 5, 6, 7: 481 a.NodeID = runningID 482 case 8: 483 a.DesiredStatus = structs.AllocDesiredStatusStop 484 } 485 }, 486 }, 487 { 488 // 5 on new node, two drained, and three draining 489 ExpectedDrained: 3, 490 ExpectedMigrated: 2, 491 MaxParallel: 5, 492 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 493 switch i { 494 case 0, 1, 2, 3, 4: 495 a.NodeID = runningID 496 case 8, 9: 497 a.DesiredStatus = structs.AllocDesiredStatusStop 498 } 499 }, 500 }, 501 { 502 // Not all on new node have health set 503 Name: "PendingHealth", 504 ExpectedDrained: 1, 505 ExpectedMigrated: 1, 506 MaxParallel: 3, 507 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 508 switch i { 509 case 0: 510 // Deployment status UNset for 1 on new node 511 a.NodeID = runningID 512 a.DeploymentStatus = nil 513 case 1, 2, 3, 4: 514 // Deployment status set for 4 on new node 515 a.NodeID = runningID 516 case 9: 517 a.DesiredStatus = structs.AllocDesiredStatusStop 518 } 519 }, 520 }, 521 { 522 // 5 max parallel - 1 migrating - 2 with unset health = 2 drainable 523 Name: "PendingHealthHigherMax", 524 ExpectedDrained: 2, 525 ExpectedMigrated: 1, 526 MaxParallel: 5, 527 AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { 528 switch i { 529 case 0, 1: 530 // Deployment status UNset for 2 on new node 531 a.NodeID = runningID 532 a.DeploymentStatus = nil 533 case 2, 3, 4: 534 // Deployment status set for 3 on new node 535 a.NodeID = runningID 536 case 9: 537 a.DesiredStatus = structs.AllocDesiredStatusStop 538 } 539 }, 540 }, 541 } 542 543 for _, testCase := range cases { 544 t.Run(testCase.Name, func(t *testing.T) { 545 testHandleTaskGroup(t, testCase) 546 }) 547 } 548 } 549 550 func testHandleTaskGroup(t *testing.T, tc handleTaskGroupTestCase) { 551 ci.Parallel(t) 552 553 require := require.New(t) 554 assert := assert.New(t) 555 556 // Create nodes 557 state := state.TestStateStore(t) 558 drainingNode, runningNode := testNodes(t, state) 559 560 job := mock.Job() 561 if tc.Batch { 562 job = mock.BatchJob() 563 } 564 job.TaskGroups[0].Count = 10 565 if tc.Count > 0 { 566 job.TaskGroups[0].Count = tc.Count 567 } 568 if tc.MaxParallel > 0 { 569 job.TaskGroups[0].Migrate.MaxParallel = tc.MaxParallel 570 } 571 require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 102, job)) 572 573 var allocs []*structs.Allocation 574 for i := 0; i < 10; i++ { 575 a := mock.Alloc() 576 if tc.Batch { 577 a = mock.BatchAlloc() 578 } 579 a.JobID = job.ID 580 a.Job = job 581 a.TaskGroup = job.TaskGroups[0].Name 582 583 // Default to being healthy on the draining node 584 a.NodeID = drainingNode.ID 585 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 586 Healthy: pointer.Of(true), 587 } 588 if tc.AddAlloc != nil { 589 tc.AddAlloc(i, a, drainingNode.ID, runningNode.ID) 590 } 591 allocs = append(allocs, a) 592 } 593 594 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 103, allocs)) 595 snap, err := state.Snapshot() 596 require.Nil(err) 597 598 res := newJobResult() 599 require.Nil(handleTaskGroup(snap, tc.Batch, job.TaskGroups[0], allocs, 102, res)) 600 assert.Lenf(res.drain, tc.ExpectedDrained, "Drain expected %d but found: %d", 601 tc.ExpectedDrained, len(res.drain)) 602 assert.Lenf(res.migrated, tc.ExpectedMigrated, "Migrate expected %d but found: %d", 603 tc.ExpectedMigrated, len(res.migrated)) 604 assert.Equal(tc.ExpectedDone, res.done) 605 } 606 607 func TestHandleTaskGroup_Migrations(t *testing.T) { 608 ci.Parallel(t) 609 require := require.New(t) 610 611 // Create a draining node 612 state := state.TestStateStore(t) 613 n := mock.Node() 614 n.DrainStrategy = &structs.DrainStrategy{ 615 DrainSpec: structs.DrainSpec{ 616 Deadline: 5 * time.Minute, 617 }, 618 ForceDeadline: time.Now().Add(1 * time.Minute), 619 } 620 require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n)) 621 622 job := mock.Job() 623 require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, job)) 624 625 // Create 10 done allocs 626 var allocs []*structs.Allocation 627 for i := 0; i < 10; i++ { 628 a := mock.Alloc() 629 a.Job = job 630 a.TaskGroup = job.TaskGroups[0].Name 631 a.NodeID = n.ID 632 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 633 Healthy: pointer.Of(false), 634 } 635 636 if i%2 == 0 { 637 a.DesiredStatus = structs.AllocDesiredStatusStop 638 } else { 639 a.ClientStatus = structs.AllocClientStatusFailed 640 } 641 allocs = append(allocs, a) 642 } 643 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs)) 644 645 snap, err := state.Snapshot() 646 require.Nil(err) 647 648 // Handle before and after indexes as both service and batch 649 res := newJobResult() 650 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res)) 651 require.Empty(res.drain) 652 require.Len(res.migrated, 10) 653 require.True(res.done) 654 655 res = newJobResult() 656 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res)) 657 require.Empty(res.drain) 658 require.Len(res.migrated, 10) 659 require.True(res.done) 660 661 res = newJobResult() 662 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res)) 663 require.Empty(res.drain) 664 require.Empty(res.migrated) 665 require.True(res.done) 666 667 res = newJobResult() 668 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res)) 669 require.Empty(res.drain) 670 require.Empty(res.migrated) 671 require.True(res.done) 672 } 673 674 // This test asserts that handle task group works when an allocation is on a 675 // garbage collected node 676 func TestHandleTaskGroup_GarbageCollectedNode(t *testing.T) { 677 ci.Parallel(t) 678 require := require.New(t) 679 680 // Create a draining node 681 state := state.TestStateStore(t) 682 n := mock.Node() 683 n.DrainStrategy = &structs.DrainStrategy{ 684 DrainSpec: structs.DrainSpec{ 685 Deadline: 5 * time.Minute, 686 }, 687 ForceDeadline: time.Now().Add(1 * time.Minute), 688 } 689 require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n)) 690 691 job := mock.Job() 692 require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, job)) 693 694 // Create 10 done allocs 695 var allocs []*structs.Allocation 696 for i := 0; i < 10; i++ { 697 a := mock.Alloc() 698 a.Job = job 699 a.TaskGroup = job.TaskGroups[0].Name 700 a.NodeID = n.ID 701 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 702 Healthy: pointer.Of(false), 703 } 704 705 if i%2 == 0 { 706 a.DesiredStatus = structs.AllocDesiredStatusStop 707 } else { 708 a.ClientStatus = structs.AllocClientStatusFailed 709 } 710 allocs = append(allocs, a) 711 } 712 713 // Make the first one be on a GC'd node 714 allocs[0].NodeID = uuid.Generate() 715 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs)) 716 717 snap, err := state.Snapshot() 718 require.Nil(err) 719 720 // Handle before and after indexes as both service and batch 721 res := newJobResult() 722 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res)) 723 require.Empty(res.drain) 724 require.Len(res.migrated, 9) 725 require.True(res.done) 726 727 res = newJobResult() 728 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res)) 729 require.Empty(res.drain) 730 require.Len(res.migrated, 9) 731 require.True(res.done) 732 733 res = newJobResult() 734 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res)) 735 require.Empty(res.drain) 736 require.Empty(res.migrated) 737 require.True(res.done) 738 739 res = newJobResult() 740 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res)) 741 require.Empty(res.drain) 742 require.Empty(res.migrated) 743 require.True(res.done) 744 }