github.com/hernad/nomad@v1.6.112/nomad/drainer/watch_jobs_test.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package drainer 5 6 import ( 7 "context" 8 "testing" 9 "time" 10 11 "github.com/shoenig/test" 12 "github.com/shoenig/test/must" 13 "github.com/stretchr/testify/require" 14 "golang.org/x/time/rate" 15 16 "github.com/hernad/nomad/ci" 17 "github.com/hernad/nomad/helper/pointer" 18 "github.com/hernad/nomad/helper/testlog" 19 "github.com/hernad/nomad/helper/uuid" 20 "github.com/hernad/nomad/nomad/mock" 21 "github.com/hernad/nomad/nomad/state" 22 "github.com/hernad/nomad/nomad/structs" 23 ) 24 25 func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) { 26 n1 := mock.Node() 27 n1.Name = "draining" 28 n1.DrainStrategy = &structs.DrainStrategy{ 29 DrainSpec: structs.DrainSpec{ 30 Deadline: time.Minute, 31 }, 32 ForceDeadline: time.Now().Add(time.Minute), 33 } 34 require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 100, n1)) 35 36 // Create a non-draining node 37 n2 := mock.Node() 38 n2.Name = "running" 39 require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 101, n2)) 40 return n1, n2 41 } 42 43 func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) { 44 t.Helper() 45 46 limiter := rate.NewLimiter(100.0, 100) 47 logger := testlog.HCLogger(t) 48 ctx, cancel := context.WithCancel(context.Background()) 49 w := NewDrainingJobWatcher(ctx, limiter, state, logger) 50 return w, cancel 51 } 52 53 // TestDrainingJobWatcher_Interface is a compile-time assertion that we 54 // implement the intended interface. 55 func TestDrainingJobWatcher_Interface(t *testing.T) { 56 ci.Parallel(t) 57 58 w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t)) 59 cancel() 60 var _ DrainingJobWatcher = w 61 } 62 63 // asertJobWatcherOps asserts a certain number of allocs are drained and/or 64 // migrated by the job watcher. 65 func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) ( 66 *DrainRequest, []*structs.Allocation) { 67 t.Helper() 68 var ( 69 drains *DrainRequest 70 migrations []*structs.Allocation 71 drainsChecked, migrationsChecked bool 72 ) 73 for { 74 select { 75 case drains = <-jw.Drain(): 76 ids := make([]string, len(drains.Allocs)) 77 for i, a := range drains.Allocs { 78 ids[i] = a.JobID[:6] + ":" + a.ID[:6] 79 } 80 t.Logf("draining %d allocs: %v", len(ids), ids) 81 require.False(t, drainsChecked, "drains already received") 82 drainsChecked = true 83 require.Lenf(t, drains.Allocs, drained, 84 "expected %d drains but found %d", drained, len(drains.Allocs)) 85 case migrations = <-jw.Migrated(): 86 ids := make([]string, len(migrations)) 87 for i, a := range migrations { 88 ids[i] = a.JobID[:6] + ":" + a.ID[:6] 89 } 90 t.Logf("migrating %d allocs: %v", len(ids), ids) 91 require.False(t, migrationsChecked, "migrations already received") 92 migrationsChecked = true 93 require.Lenf(t, migrations, migrated, 94 "expected %d migrations but found %d", migrated, len(migrations)) 95 case <-time.After(10 * time.Millisecond): 96 if !drainsChecked && drained > 0 { 97 t.Fatalf("expected %d drains but none happened", drained) 98 } 99 if !migrationsChecked && migrated > 0 { 100 t.Fatalf("expected %d migrations but none happened", migrated) 101 } 102 return drains, migrations 103 } 104 } 105 } 106 107 // TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches 108 // allocation changes from multiple jobs. 109 func TestDrainingJobWatcher_DrainJobs(t *testing.T) { 110 ci.Parallel(t) 111 112 store := state.TestStateStore(t) 113 jobWatcher, cancelWatcher := testDrainingJobWatcher(t, store) 114 defer cancelWatcher() 115 drainingNode, runningNode := testNodes(t, store) 116 117 var index uint64 = 101 118 count := 8 119 120 newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation { 121 a := mock.Alloc() 122 a.JobID = job.ID 123 a.Job = job 124 a.TaskGroup = job.TaskGroups[0].Name 125 a.NodeID = node.ID 126 return a 127 } 128 129 // 2 jobs with count 10, max parallel 3 130 jnss := make([]structs.NamespacedID, 2) 131 jobs := make([]*structs.Job, 2) 132 for i := 0; i < 2; i++ { 133 job := mock.Job() 134 jobs[i] = job 135 jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID} 136 job.TaskGroups[0].Migrate.MaxParallel = 3 137 job.TaskGroups[0].Count = count 138 must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, job)) 139 index++ 140 141 var allocs []*structs.Allocation 142 for i := 0; i < count; i++ { 143 a := newAlloc(drainingNode, job) 144 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 145 Healthy: pointer.Of(true), 146 } 147 allocs = append(allocs, a) 148 } 149 150 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, allocs)) 151 index++ 152 153 } 154 155 // Only register jobs with watcher after creating all data models as 156 // once the watcher starts we need to track the index carefully for 157 // updating the batch future 158 jobWatcher.RegisterJobs(jnss) 159 160 // Expect a first batch of MaxParallel allocs from each job 161 drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0) 162 163 // Fake migrating the drained allocs by starting new ones and stopping 164 // the old ones 165 drainedAllocs := make([]*structs.Allocation, len(drains.Allocs)) 166 for i, a := range drains.Allocs { 167 a.DesiredTransition.Migrate = pointer.Of(true) 168 169 // create a copy so we can reuse this slice 170 drainedAllocs[i] = a.Copy() 171 } 172 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs)) 173 drains.Resp.Respond(index, nil) 174 index++ 175 176 // Just setting ShouldMigrate should not cause any further drains 177 assertJobWatcherOps(t, jobWatcher, 0, 0) 178 179 // Proceed our fake migration along by creating new allocs and stopping 180 // old ones 181 replacements := make([]*structs.Allocation, len(drainedAllocs)) 182 updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2) 183 for i, a := range drainedAllocs { 184 // Stop drained allocs 185 a.DesiredTransition.Migrate = nil 186 a.DesiredStatus = structs.AllocDesiredStatusStop 187 188 // Create a replacement 189 replacement := mock.Alloc() 190 replacement.JobID = a.Job.ID 191 replacement.Job = a.Job 192 replacement.TaskGroup = a.TaskGroup 193 replacement.NodeID = runningNode.ID 194 // start in pending state with no health status 195 196 updates = append(updates, a, replacement) 197 replacements[i] = replacement.Copy() 198 } 199 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, updates)) 200 index++ 201 202 // The drained allocs stopping cause migrations but no new drains 203 // because the replacements have not started 204 assertJobWatcherOps(t, jobWatcher, 0, 0) 205 206 // Client sends stop on these allocs 207 completeAllocs := make([]*structs.Allocation, len(drainedAllocs)) 208 for i, a := range drainedAllocs { 209 a = a.Copy() 210 a.ClientStatus = structs.AllocClientStatusComplete 211 completeAllocs[i] = a 212 } 213 must.NoError(t, store.UpdateAllocsFromClient(structs.MsgTypeTestSetup, index, completeAllocs)) 214 index++ 215 216 // The drained allocs stopping cause migrations but no new drains 217 // because the replacements have not started 218 assertJobWatcherOps(t, jobWatcher, 0, 6) 219 220 // Finally kickoff further drain activity by "starting" replacements 221 for _, a := range replacements { 222 a.ClientStatus = structs.AllocClientStatusRunning 223 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 224 Healthy: pointer.Of(true), 225 } 226 } 227 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements)) 228 index++ 229 230 must.MapNotEmpty(t, jobWatcher.drainingJobs()) 231 232 // 6 new drains 233 drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0) 234 235 // Fake migrations once more to finish the drain 236 drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) 237 for i, a := range drains.Allocs { 238 a.DesiredTransition.Migrate = pointer.Of(true) 239 240 // create a copy so we can reuse this slice 241 drainedAllocs[i] = a.Copy() 242 } 243 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs)) 244 drains.Resp.Respond(index, nil) 245 index++ 246 247 assertJobWatcherOps(t, jobWatcher, 0, 0) 248 249 replacements = make([]*structs.Allocation, len(drainedAllocs)) 250 updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) 251 for i, a := range drainedAllocs { 252 a.DesiredTransition.Migrate = nil 253 a.DesiredStatus = structs.AllocDesiredStatusStop 254 a.ClientStatus = structs.AllocClientStatusComplete 255 256 replacement := newAlloc(runningNode, a.Job) 257 updates = append(updates, a, replacement) 258 replacements[i] = replacement.Copy() 259 } 260 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, updates)) 261 index++ 262 263 assertJobWatcherOps(t, jobWatcher, 0, 6) 264 265 for _, a := range replacements { 266 a.ClientStatus = structs.AllocClientStatusRunning 267 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 268 Healthy: pointer.Of(true), 269 } 270 } 271 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements)) 272 index++ 273 274 must.MapNotEmpty(t, jobWatcher.drainingJobs()) 275 276 // Final 4 new drains 277 drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0) 278 279 // Fake migrations once more to finish the drain 280 drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) 281 for i, a := range drains.Allocs { 282 a.DesiredTransition.Migrate = pointer.Of(true) 283 284 // create a copy so we can reuse this slice 285 drainedAllocs[i] = a.Copy() 286 } 287 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs)) 288 drains.Resp.Respond(index, nil) 289 index++ 290 291 assertJobWatcherOps(t, jobWatcher, 0, 0) 292 293 replacements = make([]*structs.Allocation, len(drainedAllocs)) 294 updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) 295 for i, a := range drainedAllocs { 296 a.DesiredTransition.Migrate = nil 297 a.DesiredStatus = structs.AllocDesiredStatusStop 298 a.ClientStatus = structs.AllocClientStatusComplete 299 300 replacement := newAlloc(runningNode, a.Job) 301 updates = append(updates, a, replacement) 302 replacements[i] = replacement.Copy() 303 } 304 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, updates)) 305 index++ 306 307 assertJobWatcherOps(t, jobWatcher, 0, 4) 308 309 for _, a := range replacements { 310 a.ClientStatus = structs.AllocClientStatusRunning 311 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 312 Healthy: pointer.Of(true), 313 } 314 } 315 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements)) 316 317 // No jobs should be left! 318 must.MapEmpty(t, jobWatcher.drainingJobs()) 319 } 320 321 // TestDrainingJobWatcher_HandleTaskGroup tests that the watcher handles 322 // allocation updates as expected. 323 func TestDrainingJobWatcher_HandleTaskGroup(t *testing.T) { 324 ci.Parallel(t) 325 326 testCases := []struct { 327 name string 328 batch bool // use a batch job 329 allocCount int // number of allocs in test (defaults to 10) 330 maxParallel int // max_parallel (defaults to 1) 331 332 // addAllocFn will be called allocCount times to create test allocs, 333 // and the allocs default to be healthy on the draining node 334 addAllocFn func(idx int, a *structs.Allocation, drainingID, runningID string) 335 336 expectDrained int 337 expectMigrated int 338 expectDone bool 339 }{ 340 { 341 // all allocs on draining node, should respect max_parallel=1 342 name: "drain-respects-max-parallel-1", 343 expectDrained: 1, 344 expectMigrated: 0, 345 expectDone: false, 346 }, 347 { 348 // allocs on a non-draining node, should not be drained 349 name: "allocs-on-non-draining-node-should-not-drain", 350 expectDrained: 0, 351 expectMigrated: 0, 352 expectDone: true, 353 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 354 a.NodeID = runningID 355 }, 356 }, 357 { 358 // even unhealthy allocs on a non-draining node should not be drained 359 name: "unhealthy-allocs-on-non-draining-node-should-not-drain", 360 expectDrained: 0, 361 expectMigrated: 0, 362 expectDone: false, 363 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 364 if i%2 == 0 { 365 a.NodeID = runningID 366 a.DeploymentStatus = nil 367 } 368 }, 369 }, 370 { 371 // only the alloc on draining node should be drained 372 name: "healthy-alloc-draining-node-should-drain", 373 expectDrained: 1, 374 expectMigrated: 0, 375 expectDone: false, 376 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 377 if i != 0 { 378 a.NodeID = runningID 379 } 380 }, 381 }, 382 { 383 // alloc that's still draining doesn't produce more result updates 384 name: "still-draining-alloc-no-new-updates", 385 expectDrained: 0, 386 expectMigrated: 0, 387 expectDone: false, 388 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 389 if i == 0 { 390 a.DesiredTransition.Migrate = pointer.Of(true) 391 return 392 } 393 a.NodeID = runningID 394 }, 395 }, 396 { 397 // alloc that's finished draining gets marked as migrated 398 name: "client-terminal-alloc-drain-should-be-finished", 399 expectDrained: 0, 400 expectMigrated: 1, 401 expectDone: true, 402 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 403 if i == 0 { 404 a.DesiredStatus = structs.AllocDesiredStatusStop 405 a.ClientStatus = structs.AllocClientStatusComplete 406 return 407 } 408 a.NodeID = runningID 409 }, 410 }, 411 { 412 // batch alloc that's finished draining gets marked as migrated 413 name: "client-terminal-batch-alloc-drain-should-be-finished", 414 batch: true, 415 expectDrained: 0, 416 expectMigrated: 1, 417 expectDone: true, 418 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 419 if i == 0 { 420 a.DesiredStatus = structs.AllocDesiredStatusStop 421 a.ClientStatus = structs.AllocClientStatusComplete 422 return 423 } 424 a.NodeID = runningID 425 }, 426 }, 427 { 428 // all allocs are client-terminal, so nothing left to drain 429 name: "all-client-terminal-drain-should-be-finished", 430 expectDrained: 0, 431 expectMigrated: 10, 432 expectDone: true, 433 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 434 a.DesiredStatus = structs.AllocDesiredStatusStop 435 a.ClientStatus = structs.AllocClientStatusComplete 436 }, 437 }, 438 { 439 // all allocs are terminal, but only half are client-terminal 440 name: "half-client-terminal-drain-should-not-be-finished", 441 expectDrained: 0, 442 expectMigrated: 5, 443 expectDone: false, 444 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 445 a.DesiredStatus = structs.AllocDesiredStatusStop 446 if i%2 == 0 { 447 a.ClientStatus = structs.AllocClientStatusComplete 448 } 449 }, 450 }, 451 { 452 // All allocs are terminal, nothing to be drained 453 name: "all-terminal-batch", 454 batch: true, 455 expectDrained: 0, 456 expectMigrated: 10, 457 expectDone: true, 458 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 459 a.DesiredStatus = structs.AllocDesiredStatusStop 460 a.ClientStatus = structs.AllocClientStatusComplete 461 }, 462 }, 463 { 464 // with max_parallel=10, all allocs can be drained at once 465 name: "drain-respects-max-parallel-all-at-once", 466 expectDrained: 10, 467 expectMigrated: 0, 468 expectDone: false, 469 maxParallel: 10, 470 }, 471 { 472 // with max_parallel=2, up to 2 allocs can be drained at a time 473 name: "drain-respects-max-parallel-2", 474 expectDrained: 2, 475 expectMigrated: 0, 476 expectDone: false, 477 maxParallel: 2, 478 }, 479 { 480 // with max_parallel=2, up to 2 allocs can be drained at a time but 481 // we haven't yet informed the drainer that 1 has completed 482 // migrating 483 name: "notify-migrated-1-on-new-1-drained-1-draining", 484 expectDrained: 1, 485 expectMigrated: 1, 486 maxParallel: 2, 487 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 488 switch i { 489 case 0: 490 // One alloc on running node 491 a.NodeID = runningID 492 case 1: 493 // One alloc already migrated 494 a.DesiredStatus = structs.AllocDesiredStatusStop 495 a.ClientStatus = structs.AllocClientStatusComplete 496 } 497 }, 498 }, 499 { 500 // with max_parallel=2, up to 2 allocs can be drained at a time but 501 // we haven't yet informed the drainer that 1 has completed 502 // migrating 503 name: "notify-migrated-8-on-new-1-drained-1-draining", 504 expectDrained: 1, 505 expectMigrated: 1, 506 maxParallel: 2, 507 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 508 switch i { 509 case 0, 1, 2, 3, 4, 5, 6, 7: 510 a.NodeID = runningID 511 case 8: 512 a.DesiredStatus = structs.AllocDesiredStatusStop 513 a.ClientStatus = structs.AllocClientStatusComplete 514 } 515 }, 516 }, 517 { 518 // 5 on new node, two drained, and three draining 519 // with max_parallel=5, up to 5 allocs can be drained at a time but 520 // we haven't yet informed the drainer that 2 have completed 521 // migrating 522 name: "notify-migrated-5-on-new-2-drained-3-draining", 523 expectDrained: 3, 524 expectMigrated: 2, 525 maxParallel: 5, 526 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 527 switch i { 528 case 0, 1, 2, 3, 4: 529 a.NodeID = runningID 530 case 8, 9: 531 a.DesiredStatus = structs.AllocDesiredStatusStop 532 a.ClientStatus = structs.AllocClientStatusComplete 533 } 534 }, 535 }, 536 { 537 // half the allocs have been moved to the new node but 1 doesn't 538 // have health set yet, so we should have MaxParallel - 1 in flight 539 name: "pending-health-blocks", 540 expectDrained: 1, 541 expectMigrated: 1, 542 maxParallel: 3, 543 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 544 switch i { 545 case 0: 546 // Deployment status UNset for 1 on new node 547 a.NodeID = runningID 548 a.DeploymentStatus = nil 549 case 1, 2, 3, 4: 550 // Deployment status set for 4 on new node 551 a.NodeID = runningID 552 case 9: 553 a.DesiredStatus = structs.AllocDesiredStatusStop 554 a.ClientStatus = structs.AllocClientStatusComplete 555 } 556 }, 557 }, 558 { 559 // half the allocs have been moved to the new node but 2 don't have 560 // health set yet, so we should have MaxParallel - 2 in flight 561 name: "pending-health-blocks-higher-max", 562 expectDrained: 2, 563 expectMigrated: 1, 564 maxParallel: 5, 565 addAllocFn: func(i int, a *structs.Allocation, drainingID, runningID string) { 566 switch i { 567 case 0, 1: 568 // Deployment status UNset for 2 on new node 569 a.NodeID = runningID 570 a.DeploymentStatus = nil 571 case 2, 3, 4: 572 // Deployment status set for 3 on new node 573 a.NodeID = runningID 574 case 9: 575 a.DesiredStatus = structs.AllocDesiredStatusStop 576 a.ClientStatus = structs.AllocClientStatusComplete 577 } 578 }, 579 }, 580 } 581 582 for _, tc := range testCases { 583 tc := tc 584 t.Run(tc.name, func(t *testing.T) { 585 ci.Parallel(t) 586 587 // Create nodes 588 store := state.TestStateStore(t) 589 drainingNode, runningNode := testNodes(t, store) 590 591 job := mock.Job() 592 if tc.batch { 593 job = mock.BatchJob() 594 } 595 job.TaskGroups[0].Count = 10 596 if tc.allocCount > 0 { 597 job.TaskGroups[0].Count = tc.allocCount 598 } 599 if tc.maxParallel > 0 { 600 job.TaskGroups[0].Migrate.MaxParallel = tc.maxParallel 601 } 602 must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, 102, nil, job)) 603 604 var allocs []*structs.Allocation 605 for i := 0; i < 10; i++ { 606 a := mock.Alloc() 607 if tc.batch { 608 a = mock.BatchAlloc() 609 } 610 a.JobID = job.ID 611 a.Job = job 612 a.TaskGroup = job.TaskGroups[0].Name 613 614 // Default to being healthy on the draining node 615 a.NodeID = drainingNode.ID 616 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 617 Healthy: pointer.Of(true), 618 } 619 if tc.addAllocFn != nil { 620 tc.addAllocFn(i, a, drainingNode.ID, runningNode.ID) 621 } 622 allocs = append(allocs, a) 623 } 624 625 must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 103, allocs)) 626 snap, err := store.Snapshot() 627 must.NoError(t, err) 628 629 res := newJobResult() 630 must.NoError(t, handleTaskGroup(snap, tc.batch, job.TaskGroups[0], allocs, 102, res)) 631 test.Len(t, tc.expectDrained, res.drain, test.Sprint("expected drained allocs")) 632 test.Len(t, tc.expectMigrated, res.migrated, test.Sprint("expected migrated allocs")) 633 test.Eq(t, tc.expectDone, res.done) 634 }) 635 } 636 } 637 638 func TestHandleTaskGroup_Migrations(t *testing.T) { 639 ci.Parallel(t) 640 require := require.New(t) 641 642 // Create a draining node 643 state := state.TestStateStore(t) 644 n := mock.Node() 645 n.DrainStrategy = &structs.DrainStrategy{ 646 DrainSpec: structs.DrainSpec{ 647 Deadline: 5 * time.Minute, 648 }, 649 ForceDeadline: time.Now().Add(1 * time.Minute), 650 } 651 require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n)) 652 653 job := mock.Job() 654 require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job)) 655 656 // Create 10 done allocs 657 var allocs []*structs.Allocation 658 for i := 0; i < 10; i++ { 659 a := mock.Alloc() 660 a.Job = job 661 a.TaskGroup = job.TaskGroups[0].Name 662 a.NodeID = n.ID 663 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 664 Healthy: pointer.Of(false), 665 } 666 667 if i%2 == 0 { 668 a.DesiredStatus = structs.AllocDesiredStatusStop 669 a.ClientStatus = structs.AllocClientStatusComplete 670 } else { 671 a.ClientStatus = structs.AllocClientStatusFailed 672 } 673 allocs = append(allocs, a) 674 } 675 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs)) 676 677 snap, err := state.Snapshot() 678 require.Nil(err) 679 680 // Handle before and after indexes as both service and batch 681 res := newJobResult() 682 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res)) 683 require.Empty(res.drain) 684 require.Len(res.migrated, 10) 685 require.True(res.done) 686 687 res = newJobResult() 688 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res)) 689 require.Empty(res.drain) 690 require.Len(res.migrated, 10) 691 require.True(res.done) 692 693 res = newJobResult() 694 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res)) 695 require.Empty(res.drain) 696 require.Empty(res.migrated) 697 require.True(res.done) 698 699 res = newJobResult() 700 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res)) 701 require.Empty(res.drain) 702 require.Empty(res.migrated) 703 require.True(res.done) 704 } 705 706 // This test asserts that handle task group works when an allocation is on a 707 // garbage collected node 708 func TestHandleTaskGroup_GarbageCollectedNode(t *testing.T) { 709 ci.Parallel(t) 710 require := require.New(t) 711 712 // Create a draining node 713 state := state.TestStateStore(t) 714 n := mock.Node() 715 n.DrainStrategy = &structs.DrainStrategy{ 716 DrainSpec: structs.DrainSpec{ 717 Deadline: 5 * time.Minute, 718 }, 719 ForceDeadline: time.Now().Add(1 * time.Minute), 720 } 721 require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n)) 722 723 job := mock.Job() 724 require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job)) 725 726 // Create 10 done allocs 727 var allocs []*structs.Allocation 728 for i := 0; i < 10; i++ { 729 a := mock.Alloc() 730 a.Job = job 731 a.TaskGroup = job.TaskGroups[0].Name 732 a.NodeID = n.ID 733 a.DeploymentStatus = &structs.AllocDeploymentStatus{ 734 Healthy: pointer.Of(false), 735 } 736 737 if i%2 == 0 { 738 a.DesiredStatus = structs.AllocDesiredStatusStop 739 a.ClientStatus = structs.AllocClientStatusComplete 740 } else { 741 a.ClientStatus = structs.AllocClientStatusFailed 742 } 743 allocs = append(allocs, a) 744 } 745 746 // Make the first one be on a GC'd node 747 allocs[0].NodeID = uuid.Generate() 748 require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs)) 749 750 snap, err := state.Snapshot() 751 require.Nil(err) 752 753 // Handle before and after indexes as both service and batch 754 res := newJobResult() 755 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res)) 756 require.Empty(res.drain) 757 require.Len(res.migrated, 9) 758 require.True(res.done) 759 760 res = newJobResult() 761 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res)) 762 require.Empty(res.drain) 763 require.Len(res.migrated, 9) 764 require.True(res.done) 765 766 res = newJobResult() 767 require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res)) 768 require.Empty(res.drain) 769 require.Empty(res.migrated) 770 require.True(res.done) 771 772 res = newJobResult() 773 require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res)) 774 require.Empty(res.drain) 775 require.Empty(res.migrated) 776 require.True(res.done) 777 }