github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/drainer/watch_jobs_test.go (about)

     1  package drainer
     2  
     3  import (
     4  	"context"
     5  	"testing"
     6  	"time"
     7  
     8  	"github.com/hashicorp/nomad/ci"
     9  	"github.com/hashicorp/nomad/helper/pointer"
    10  	"github.com/hashicorp/nomad/helper/testlog"
    11  	"github.com/hashicorp/nomad/helper/uuid"
    12  	"github.com/hashicorp/nomad/nomad/mock"
    13  	"github.com/hashicorp/nomad/nomad/state"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"github.com/stretchr/testify/assert"
    16  	"github.com/stretchr/testify/require"
    17  	"golang.org/x/time/rate"
    18  )
    19  
    20  func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) {
    21  	n1 := mock.Node()
    22  	n1.Name = "draining"
    23  	n1.DrainStrategy = &structs.DrainStrategy{
    24  		DrainSpec: structs.DrainSpec{
    25  			Deadline: time.Minute,
    26  		},
    27  		ForceDeadline: time.Now().Add(time.Minute),
    28  	}
    29  	require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 100, n1))
    30  
    31  	// Create a non-draining node
    32  	n2 := mock.Node()
    33  	n2.Name = "running"
    34  	require.Nil(t, state.UpsertNode(structs.MsgTypeTestSetup, 101, n2))
    35  	return n1, n2
    36  }
    37  
    38  func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) {
    39  	t.Helper()
    40  
    41  	limiter := rate.NewLimiter(100.0, 100)
    42  	logger := testlog.HCLogger(t)
    43  	ctx, cancel := context.WithCancel(context.Background())
    44  	w := NewDrainingJobWatcher(ctx, limiter, state, logger)
    45  	return w, cancel
    46  }
    47  
    48  // TestDrainingJobWatcher_Interface is a compile-time assertion that we
    49  // implement the intended interface.
    50  func TestDrainingJobWatcher_Interface(t *testing.T) {
    51  	ci.Parallel(t)
    52  
    53  	w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t))
    54  	cancel()
    55  	var _ DrainingJobWatcher = w
    56  }
    57  
    58  // asertJobWatcherOps asserts a certain number of allocs are drained and/or
    59  // migrated by the job watcher.
    60  func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) (
    61  	*DrainRequest, []*structs.Allocation) {
    62  	t.Helper()
    63  	var (
    64  		drains                           *DrainRequest
    65  		migrations                       []*structs.Allocation
    66  		drainsChecked, migrationsChecked bool
    67  	)
    68  	for {
    69  		select {
    70  		case drains = <-jw.Drain():
    71  			ids := make([]string, len(drains.Allocs))
    72  			for i, a := range drains.Allocs {
    73  				ids[i] = a.JobID[:6] + ":" + a.ID[:6]
    74  			}
    75  			t.Logf("draining %d allocs: %v", len(ids), ids)
    76  			require.False(t, drainsChecked, "drains already received")
    77  			drainsChecked = true
    78  			require.Lenf(t, drains.Allocs, drained,
    79  				"expected %d drains but found %d", drained, len(drains.Allocs))
    80  		case migrations = <-jw.Migrated():
    81  			ids := make([]string, len(migrations))
    82  			for i, a := range migrations {
    83  				ids[i] = a.JobID[:6] + ":" + a.ID[:6]
    84  			}
    85  			t.Logf("migrating %d allocs: %v", len(ids), ids)
    86  			require.False(t, migrationsChecked, "migrations already received")
    87  			migrationsChecked = true
    88  			require.Lenf(t, migrations, migrated,
    89  				"expected %d migrations but found %d", migrated, len(migrations))
    90  		case <-time.After(10 * time.Millisecond):
    91  			if !drainsChecked && drained > 0 {
    92  				t.Fatalf("expected %d drains but none happened", drained)
    93  			}
    94  			if !migrationsChecked && migrated > 0 {
    95  				t.Fatalf("expected %d migrations but none happened", migrated)
    96  			}
    97  			return drains, migrations
    98  		}
    99  	}
   100  }
   101  
   102  // TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches
   103  // allocation changes from multiple jobs.
   104  func TestDrainingJobWatcher_DrainJobs(t *testing.T) {
   105  	ci.Parallel(t)
   106  	require := require.New(t)
   107  
   108  	state := state.TestStateStore(t)
   109  	jobWatcher, cancelWatcher := testDrainingJobWatcher(t, state)
   110  	defer cancelWatcher()
   111  	drainingNode, runningNode := testNodes(t, state)
   112  
   113  	var index uint64 = 101
   114  	count := 8
   115  
   116  	newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation {
   117  		a := mock.Alloc()
   118  		a.JobID = job.ID
   119  		a.Job = job
   120  		a.TaskGroup = job.TaskGroups[0].Name
   121  		a.NodeID = node.ID
   122  		return a
   123  	}
   124  
   125  	// 2 jobs with count 10, max parallel 3
   126  	jnss := make([]structs.NamespacedID, 2)
   127  	jobs := make([]*structs.Job, 2)
   128  	for i := 0; i < 2; i++ {
   129  		job := mock.Job()
   130  		jobs[i] = job
   131  		jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
   132  		job.TaskGroups[0].Migrate.MaxParallel = 3
   133  		job.TaskGroups[0].Count = count
   134  		require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, index, job))
   135  		index++
   136  
   137  		var allocs []*structs.Allocation
   138  		for i := 0; i < count; i++ {
   139  			a := newAlloc(drainingNode, job)
   140  			a.DeploymentStatus = &structs.AllocDeploymentStatus{
   141  				Healthy: pointer.Of(true),
   142  			}
   143  			allocs = append(allocs, a)
   144  		}
   145  
   146  		require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, allocs))
   147  		index++
   148  
   149  	}
   150  
   151  	// Only register jobs with watcher after creating all data models as
   152  	// once the watcher starts we need to track the index carefully for
   153  	// updating the batch future
   154  	jobWatcher.RegisterJobs(jnss)
   155  
   156  	// Expect a first batch of MaxParallel allocs from each job
   157  	drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0)
   158  
   159  	// Fake migrating the drained allocs by starting new ones and stopping
   160  	// the old ones
   161  	drainedAllocs := make([]*structs.Allocation, len(drains.Allocs))
   162  	for i, a := range drains.Allocs {
   163  		a.DesiredTransition.Migrate = pointer.Of(true)
   164  
   165  		// create a copy so we can reuse this slice
   166  		drainedAllocs[i] = a.Copy()
   167  	}
   168  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
   169  	drains.Resp.Respond(index, nil)
   170  	index++
   171  
   172  	// Just setting ShouldMigrate should not cause any further drains
   173  	assertJobWatcherOps(t, jobWatcher, 0, 0)
   174  
   175  	// Proceed our fake migration along by creating new allocs and stopping
   176  	// old ones
   177  	replacements := make([]*structs.Allocation, len(drainedAllocs))
   178  	updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2)
   179  	for i, a := range drainedAllocs {
   180  		// Stop drained allocs
   181  		a.DesiredTransition.Migrate = nil
   182  		a.DesiredStatus = structs.AllocDesiredStatusStop
   183  
   184  		// Create a replacement
   185  		replacement := mock.Alloc()
   186  		replacement.JobID = a.Job.ID
   187  		replacement.Job = a.Job
   188  		replacement.TaskGroup = a.TaskGroup
   189  		replacement.NodeID = runningNode.ID
   190  		// start in pending state with no health status
   191  
   192  		updates = append(updates, a, replacement)
   193  		replacements[i] = replacement.Copy()
   194  	}
   195  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
   196  	index++
   197  
   198  	// The drained allocs stopping cause migrations but no new drains
   199  	// because the replacements have not started
   200  	assertJobWatcherOps(t, jobWatcher, 0, 6)
   201  
   202  	// Finally kickoff further drain activity by "starting" replacements
   203  	for _, a := range replacements {
   204  		a.ClientStatus = structs.AllocClientStatusRunning
   205  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   206  			Healthy: pointer.Of(true),
   207  		}
   208  	}
   209  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
   210  	index++
   211  
   212  	require.NotEmpty(jobWatcher.drainingJobs())
   213  
   214  	// 6 new drains
   215  	drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0)
   216  
   217  	// Fake migrations once more to finish the drain
   218  	drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
   219  	for i, a := range drains.Allocs {
   220  		a.DesiredTransition.Migrate = pointer.Of(true)
   221  
   222  		// create a copy so we can reuse this slice
   223  		drainedAllocs[i] = a.Copy()
   224  	}
   225  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
   226  	drains.Resp.Respond(index, nil)
   227  	index++
   228  
   229  	assertJobWatcherOps(t, jobWatcher, 0, 0)
   230  
   231  	replacements = make([]*structs.Allocation, len(drainedAllocs))
   232  	updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
   233  	for i, a := range drainedAllocs {
   234  		a.DesiredTransition.Migrate = nil
   235  		a.DesiredStatus = structs.AllocDesiredStatusStop
   236  
   237  		replacement := newAlloc(runningNode, a.Job)
   238  		updates = append(updates, a, replacement)
   239  		replacements[i] = replacement.Copy()
   240  	}
   241  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
   242  	index++
   243  
   244  	assertJobWatcherOps(t, jobWatcher, 0, 6)
   245  
   246  	for _, a := range replacements {
   247  		a.ClientStatus = structs.AllocClientStatusRunning
   248  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   249  			Healthy: pointer.Of(true),
   250  		}
   251  	}
   252  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
   253  	index++
   254  
   255  	require.NotEmpty(jobWatcher.drainingJobs())
   256  
   257  	// Final 4 new drains
   258  	drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0)
   259  
   260  	// Fake migrations once more to finish the drain
   261  	drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
   262  	for i, a := range drains.Allocs {
   263  		a.DesiredTransition.Migrate = pointer.Of(true)
   264  
   265  		// create a copy so we can reuse this slice
   266  		drainedAllocs[i] = a.Copy()
   267  	}
   268  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, drainedAllocs))
   269  	drains.Resp.Respond(index, nil)
   270  	index++
   271  
   272  	assertJobWatcherOps(t, jobWatcher, 0, 0)
   273  
   274  	replacements = make([]*structs.Allocation, len(drainedAllocs))
   275  	updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
   276  	for i, a := range drainedAllocs {
   277  		a.DesiredTransition.Migrate = nil
   278  		a.DesiredStatus = structs.AllocDesiredStatusStop
   279  
   280  		replacement := newAlloc(runningNode, a.Job)
   281  		updates = append(updates, a, replacement)
   282  		replacements[i] = replacement.Copy()
   283  	}
   284  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, updates))
   285  	index++
   286  
   287  	assertJobWatcherOps(t, jobWatcher, 0, 4)
   288  
   289  	for _, a := range replacements {
   290  		a.ClientStatus = structs.AllocClientStatusRunning
   291  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   292  			Healthy: pointer.Of(true),
   293  		}
   294  	}
   295  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, index, replacements))
   296  
   297  	// No jobs should be left!
   298  	require.Empty(jobWatcher.drainingJobs())
   299  }
   300  
   301  // DrainingJobWatcher tests:
   302  // TODO Test that the watcher cancels its query when a new job is registered
   303  
   304  // handleTaskGroupTestCase is the test case struct for TestHandleTaskGroup
   305  //
   306  // Two nodes will be initialized: one draining and one running.
   307  type handleTaskGroupTestCase struct {
   308  	// Name of test
   309  	Name string
   310  
   311  	// Batch uses a batch job and alloc
   312  	Batch bool
   313  
   314  	// Expectations
   315  	ExpectedDrained  int
   316  	ExpectedMigrated int
   317  	ExpectedDone     bool
   318  
   319  	// Count overrides the default count of 10 if set
   320  	Count int
   321  
   322  	// MaxParallel overrides the default max_parallel of 1 if set
   323  	MaxParallel int
   324  
   325  	// AddAlloc will be called 10 times to create test allocs
   326  	//
   327  	// Allocs default to be healthy on the draining node
   328  	AddAlloc func(i int, a *structs.Allocation, drainingID, runningID string)
   329  }
   330  
   331  func TestHandeTaskGroup_Table(t *testing.T) {
   332  	ci.Parallel(t)
   333  
   334  	cases := []handleTaskGroupTestCase{
   335  		{
   336  			// All allocs on draining node
   337  			Name:             "AllDraining",
   338  			ExpectedDrained:  1,
   339  			ExpectedMigrated: 0,
   340  			ExpectedDone:     false,
   341  		},
   342  		{
   343  			// All allocs on non-draining node
   344  			Name:             "AllNonDraining",
   345  			ExpectedDrained:  0,
   346  			ExpectedMigrated: 0,
   347  			ExpectedDone:     true,
   348  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   349  				a.NodeID = runningID
   350  			},
   351  		},
   352  		{
   353  			// Some allocs on non-draining node but not healthy
   354  			Name:             "SomeNonDrainingUnhealthy",
   355  			ExpectedDrained:  0,
   356  			ExpectedMigrated: 0,
   357  			ExpectedDone:     false,
   358  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   359  				if i%2 == 0 {
   360  					a.NodeID = runningID
   361  					a.DeploymentStatus = nil
   362  				}
   363  			},
   364  		},
   365  		{
   366  			// One draining, other allocs on non-draining node and healthy
   367  			Name:             "OneDraining",
   368  			ExpectedDrained:  1,
   369  			ExpectedMigrated: 0,
   370  			ExpectedDone:     false,
   371  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   372  				if i != 0 {
   373  					a.NodeID = runningID
   374  				}
   375  			},
   376  		},
   377  		{
   378  			// One already draining, other allocs on non-draining node and healthy
   379  			Name:             "OneAlreadyDraining",
   380  			ExpectedDrained:  0,
   381  			ExpectedMigrated: 0,
   382  			ExpectedDone:     false,
   383  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   384  				if i == 0 {
   385  					a.DesiredTransition.Migrate = pointer.Of(true)
   386  					return
   387  				}
   388  				a.NodeID = runningID
   389  			},
   390  		},
   391  		{
   392  			// One already drained, other allocs on non-draining node and healthy
   393  			Name:             "OneAlreadyDrained",
   394  			ExpectedDrained:  0,
   395  			ExpectedMigrated: 1,
   396  			ExpectedDone:     true,
   397  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   398  				if i == 0 {
   399  					a.DesiredStatus = structs.AllocDesiredStatusStop
   400  					return
   401  				}
   402  				a.NodeID = runningID
   403  			},
   404  		},
   405  		{
   406  			// One already drained, other allocs on non-draining node and healthy
   407  			Name:             "OneAlreadyDrainedBatched",
   408  			Batch:            true,
   409  			ExpectedDrained:  0,
   410  			ExpectedMigrated: 1,
   411  			ExpectedDone:     true,
   412  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   413  				if i == 0 {
   414  					a.DesiredStatus = structs.AllocDesiredStatusStop
   415  					return
   416  				}
   417  				a.NodeID = runningID
   418  			},
   419  		},
   420  		{
   421  			// All allocs are terminl, nothing to be drained
   422  			Name:             "AllMigrating",
   423  			ExpectedDrained:  0,
   424  			ExpectedMigrated: 10,
   425  			ExpectedDone:     true,
   426  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   427  				a.DesiredStatus = structs.AllocDesiredStatusStop
   428  			},
   429  		},
   430  		{
   431  			// All allocs are terminl, nothing to be drained
   432  			Name:             "AllMigratingBatch",
   433  			Batch:            true,
   434  			ExpectedDrained:  0,
   435  			ExpectedMigrated: 10,
   436  			ExpectedDone:     true,
   437  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   438  				a.DesiredStatus = structs.AllocDesiredStatusStop
   439  			},
   440  		},
   441  		{
   442  			// All allocs may be drained at once
   443  			Name:             "AllAtOnce",
   444  			ExpectedDrained:  10,
   445  			ExpectedMigrated: 0,
   446  			ExpectedDone:     false,
   447  			MaxParallel:      10,
   448  		},
   449  		{
   450  			// Drain 2
   451  			Name:             "Drain2",
   452  			ExpectedDrained:  2,
   453  			ExpectedMigrated: 0,
   454  			ExpectedDone:     false,
   455  			MaxParallel:      2,
   456  		},
   457  		{
   458  			// One on new node, one drained, and one draining
   459  			ExpectedDrained:  1,
   460  			ExpectedMigrated: 1,
   461  			MaxParallel:      2,
   462  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   463  				switch i {
   464  				case 0:
   465  					// One alloc on running node
   466  					a.NodeID = runningID
   467  				case 1:
   468  					// One alloc already migrated
   469  					a.DesiredStatus = structs.AllocDesiredStatusStop
   470  				}
   471  			},
   472  		},
   473  		{
   474  			// 8 on new node, one drained, and one draining
   475  			ExpectedDrained:  1,
   476  			ExpectedMigrated: 1,
   477  			MaxParallel:      2,
   478  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   479  				switch i {
   480  				case 0, 1, 2, 3, 4, 5, 6, 7:
   481  					a.NodeID = runningID
   482  				case 8:
   483  					a.DesiredStatus = structs.AllocDesiredStatusStop
   484  				}
   485  			},
   486  		},
   487  		{
   488  			// 5 on new node, two drained, and three draining
   489  			ExpectedDrained:  3,
   490  			ExpectedMigrated: 2,
   491  			MaxParallel:      5,
   492  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   493  				switch i {
   494  				case 0, 1, 2, 3, 4:
   495  					a.NodeID = runningID
   496  				case 8, 9:
   497  					a.DesiredStatus = structs.AllocDesiredStatusStop
   498  				}
   499  			},
   500  		},
   501  		{
   502  			// Not all on new node have health set
   503  			Name:             "PendingHealth",
   504  			ExpectedDrained:  1,
   505  			ExpectedMigrated: 1,
   506  			MaxParallel:      3,
   507  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   508  				switch i {
   509  				case 0:
   510  					// Deployment status UNset for 1 on new node
   511  					a.NodeID = runningID
   512  					a.DeploymentStatus = nil
   513  				case 1, 2, 3, 4:
   514  					// Deployment status set for 4 on new node
   515  					a.NodeID = runningID
   516  				case 9:
   517  					a.DesiredStatus = structs.AllocDesiredStatusStop
   518  				}
   519  			},
   520  		},
   521  		{
   522  			// 5 max parallel - 1 migrating - 2 with unset health = 2 drainable
   523  			Name:             "PendingHealthHigherMax",
   524  			ExpectedDrained:  2,
   525  			ExpectedMigrated: 1,
   526  			MaxParallel:      5,
   527  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   528  				switch i {
   529  				case 0, 1:
   530  					// Deployment status UNset for 2 on new node
   531  					a.NodeID = runningID
   532  					a.DeploymentStatus = nil
   533  				case 2, 3, 4:
   534  					// Deployment status set for 3 on new node
   535  					a.NodeID = runningID
   536  				case 9:
   537  					a.DesiredStatus = structs.AllocDesiredStatusStop
   538  				}
   539  			},
   540  		},
   541  	}
   542  
   543  	for _, testCase := range cases {
   544  		t.Run(testCase.Name, func(t *testing.T) {
   545  			testHandleTaskGroup(t, testCase)
   546  		})
   547  	}
   548  }
   549  
   550  func testHandleTaskGroup(t *testing.T, tc handleTaskGroupTestCase) {
   551  	ci.Parallel(t)
   552  
   553  	require := require.New(t)
   554  	assert := assert.New(t)
   555  
   556  	// Create nodes
   557  	state := state.TestStateStore(t)
   558  	drainingNode, runningNode := testNodes(t, state)
   559  
   560  	job := mock.Job()
   561  	if tc.Batch {
   562  		job = mock.BatchJob()
   563  	}
   564  	job.TaskGroups[0].Count = 10
   565  	if tc.Count > 0 {
   566  		job.TaskGroups[0].Count = tc.Count
   567  	}
   568  	if tc.MaxParallel > 0 {
   569  		job.TaskGroups[0].Migrate.MaxParallel = tc.MaxParallel
   570  	}
   571  	require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 102, job))
   572  
   573  	var allocs []*structs.Allocation
   574  	for i := 0; i < 10; i++ {
   575  		a := mock.Alloc()
   576  		if tc.Batch {
   577  			a = mock.BatchAlloc()
   578  		}
   579  		a.JobID = job.ID
   580  		a.Job = job
   581  		a.TaskGroup = job.TaskGroups[0].Name
   582  
   583  		// Default to being healthy on the draining node
   584  		a.NodeID = drainingNode.ID
   585  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   586  			Healthy: pointer.Of(true),
   587  		}
   588  		if tc.AddAlloc != nil {
   589  			tc.AddAlloc(i, a, drainingNode.ID, runningNode.ID)
   590  		}
   591  		allocs = append(allocs, a)
   592  	}
   593  
   594  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 103, allocs))
   595  	snap, err := state.Snapshot()
   596  	require.Nil(err)
   597  
   598  	res := newJobResult()
   599  	require.Nil(handleTaskGroup(snap, tc.Batch, job.TaskGroups[0], allocs, 102, res))
   600  	assert.Lenf(res.drain, tc.ExpectedDrained, "Drain expected %d but found: %d",
   601  		tc.ExpectedDrained, len(res.drain))
   602  	assert.Lenf(res.migrated, tc.ExpectedMigrated, "Migrate expected %d but found: %d",
   603  		tc.ExpectedMigrated, len(res.migrated))
   604  	assert.Equal(tc.ExpectedDone, res.done)
   605  }
   606  
   607  func TestHandleTaskGroup_Migrations(t *testing.T) {
   608  	ci.Parallel(t)
   609  	require := require.New(t)
   610  
   611  	// Create a draining node
   612  	state := state.TestStateStore(t)
   613  	n := mock.Node()
   614  	n.DrainStrategy = &structs.DrainStrategy{
   615  		DrainSpec: structs.DrainSpec{
   616  			Deadline: 5 * time.Minute,
   617  		},
   618  		ForceDeadline: time.Now().Add(1 * time.Minute),
   619  	}
   620  	require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
   621  
   622  	job := mock.Job()
   623  	require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, job))
   624  
   625  	// Create 10 done allocs
   626  	var allocs []*structs.Allocation
   627  	for i := 0; i < 10; i++ {
   628  		a := mock.Alloc()
   629  		a.Job = job
   630  		a.TaskGroup = job.TaskGroups[0].Name
   631  		a.NodeID = n.ID
   632  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   633  			Healthy: pointer.Of(false),
   634  		}
   635  
   636  		if i%2 == 0 {
   637  			a.DesiredStatus = structs.AllocDesiredStatusStop
   638  		} else {
   639  			a.ClientStatus = structs.AllocClientStatusFailed
   640  		}
   641  		allocs = append(allocs, a)
   642  	}
   643  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
   644  
   645  	snap, err := state.Snapshot()
   646  	require.Nil(err)
   647  
   648  	// Handle before and after indexes as both service and batch
   649  	res := newJobResult()
   650  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
   651  	require.Empty(res.drain)
   652  	require.Len(res.migrated, 10)
   653  	require.True(res.done)
   654  
   655  	res = newJobResult()
   656  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
   657  	require.Empty(res.drain)
   658  	require.Len(res.migrated, 10)
   659  	require.True(res.done)
   660  
   661  	res = newJobResult()
   662  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
   663  	require.Empty(res.drain)
   664  	require.Empty(res.migrated)
   665  	require.True(res.done)
   666  
   667  	res = newJobResult()
   668  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
   669  	require.Empty(res.drain)
   670  	require.Empty(res.migrated)
   671  	require.True(res.done)
   672  }
   673  
   674  // This test asserts that handle task group works when an allocation is on a
   675  // garbage collected node
   676  func TestHandleTaskGroup_GarbageCollectedNode(t *testing.T) {
   677  	ci.Parallel(t)
   678  	require := require.New(t)
   679  
   680  	// Create a draining node
   681  	state := state.TestStateStore(t)
   682  	n := mock.Node()
   683  	n.DrainStrategy = &structs.DrainStrategy{
   684  		DrainSpec: structs.DrainSpec{
   685  			Deadline: 5 * time.Minute,
   686  		},
   687  		ForceDeadline: time.Now().Add(1 * time.Minute),
   688  	}
   689  	require.Nil(state.UpsertNode(structs.MsgTypeTestSetup, 100, n))
   690  
   691  	job := mock.Job()
   692  	require.Nil(state.UpsertJob(structs.MsgTypeTestSetup, 101, job))
   693  
   694  	// Create 10 done allocs
   695  	var allocs []*structs.Allocation
   696  	for i := 0; i < 10; i++ {
   697  		a := mock.Alloc()
   698  		a.Job = job
   699  		a.TaskGroup = job.TaskGroups[0].Name
   700  		a.NodeID = n.ID
   701  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   702  			Healthy: pointer.Of(false),
   703  		}
   704  
   705  		if i%2 == 0 {
   706  			a.DesiredStatus = structs.AllocDesiredStatusStop
   707  		} else {
   708  			a.ClientStatus = structs.AllocClientStatusFailed
   709  		}
   710  		allocs = append(allocs, a)
   711  	}
   712  
   713  	// Make the first one be on a GC'd node
   714  	allocs[0].NodeID = uuid.Generate()
   715  	require.Nil(state.UpsertAllocs(structs.MsgTypeTestSetup, 102, allocs))
   716  
   717  	snap, err := state.Snapshot()
   718  	require.Nil(err)
   719  
   720  	// Handle before and after indexes as both service and batch
   721  	res := newJobResult()
   722  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
   723  	require.Empty(res.drain)
   724  	require.Len(res.migrated, 9)
   725  	require.True(res.done)
   726  
   727  	res = newJobResult()
   728  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
   729  	require.Empty(res.drain)
   730  	require.Len(res.migrated, 9)
   731  	require.True(res.done)
   732  
   733  	res = newJobResult()
   734  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
   735  	require.Empty(res.drain)
   736  	require.Empty(res.migrated)
   737  	require.True(res.done)
   738  
   739  	res = newJobResult()
   740  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
   741  	require.Empty(res.drain)
   742  	require.Empty(res.migrated)
   743  	require.True(res.done)
   744  }