github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/nomad/drainer/watch_jobs_test.go (about)

     1  package drainer
     2  
     3  import (
     4  	"context"
     5  	"testing"
     6  	"time"
     7  
     8  	"github.com/hashicorp/nomad/helper"
     9  	"github.com/hashicorp/nomad/helper/testlog"
    10  	"github.com/hashicorp/nomad/helper/uuid"
    11  	"github.com/hashicorp/nomad/nomad/mock"
    12  	"github.com/hashicorp/nomad/nomad/state"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  	"golang.org/x/time/rate"
    17  )
    18  
    19  func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) {
    20  	n1 := mock.Node()
    21  	n1.Name = "draining"
    22  	n1.DrainStrategy = &structs.DrainStrategy{
    23  		DrainSpec: structs.DrainSpec{
    24  			Deadline: time.Minute,
    25  		},
    26  		ForceDeadline: time.Now().Add(time.Minute),
    27  	}
    28  	require.Nil(t, state.UpsertNode(100, n1))
    29  
    30  	// Create a non-draining node
    31  	n2 := mock.Node()
    32  	n2.Name = "running"
    33  	require.Nil(t, state.UpsertNode(101, n2))
    34  	return n1, n2
    35  }
    36  
    37  func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) {
    38  	t.Helper()
    39  
    40  	limiter := rate.NewLimiter(100.0, 100)
    41  	logger := testlog.HCLogger(t)
    42  	ctx, cancel := context.WithCancel(context.Background())
    43  	w := NewDrainingJobWatcher(ctx, limiter, state, logger)
    44  	return w, cancel
    45  }
    46  
    47  // TestDrainingJobWatcher_Interface is a compile-time assertion that we
    48  // implement the intended interface.
    49  func TestDrainingJobWatcher_Interface(t *testing.T) {
    50  	w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t))
    51  	cancel()
    52  	var _ DrainingJobWatcher = w
    53  }
    54  
    55  // asertJobWatcherOps asserts a certain number of allocs are drained and/or
    56  // migrated by the job watcher.
    57  func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) (
    58  	*DrainRequest, []*structs.Allocation) {
    59  	t.Helper()
    60  	var (
    61  		drains                           *DrainRequest
    62  		migrations                       []*structs.Allocation
    63  		drainsChecked, migrationsChecked bool
    64  	)
    65  	for {
    66  		select {
    67  		case drains = <-jw.Drain():
    68  			ids := make([]string, len(drains.Allocs))
    69  			for i, a := range drains.Allocs {
    70  				ids[i] = a.JobID[:6] + ":" + a.ID[:6]
    71  			}
    72  			t.Logf("draining %d allocs: %v", len(ids), ids)
    73  			require.False(t, drainsChecked, "drains already received")
    74  			drainsChecked = true
    75  			require.Lenf(t, drains.Allocs, drained,
    76  				"expected %d drains but found %d", drained, len(drains.Allocs))
    77  		case migrations = <-jw.Migrated():
    78  			ids := make([]string, len(migrations))
    79  			for i, a := range migrations {
    80  				ids[i] = a.JobID[:6] + ":" + a.ID[:6]
    81  			}
    82  			t.Logf("migrating %d allocs: %v", len(ids), ids)
    83  			require.False(t, migrationsChecked, "migrations already received")
    84  			migrationsChecked = true
    85  			require.Lenf(t, migrations, migrated,
    86  				"expected %d migrations but found %d", migrated, len(migrations))
    87  		case <-time.After(10 * time.Millisecond):
    88  			if !drainsChecked && drained > 0 {
    89  				t.Fatalf("expected %d drains but none happened", drained)
    90  			}
    91  			if !migrationsChecked && migrated > 0 {
    92  				t.Fatalf("expected %d migrations but none happened", migrated)
    93  			}
    94  			return drains, migrations
    95  		}
    96  	}
    97  }
    98  
    99  // TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches
   100  // allocation changes from multiple jobs.
   101  func TestDrainingJobWatcher_DrainJobs(t *testing.T) {
   102  	t.Parallel()
   103  	require := require.New(t)
   104  
   105  	state := state.TestStateStore(t)
   106  	jobWatcher, cancelWatcher := testDrainingJobWatcher(t, state)
   107  	defer cancelWatcher()
   108  	drainingNode, runningNode := testNodes(t, state)
   109  
   110  	var index uint64 = 101
   111  	count := 8
   112  
   113  	newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation {
   114  		a := mock.Alloc()
   115  		a.JobID = job.ID
   116  		a.Job = job
   117  		a.TaskGroup = job.TaskGroups[0].Name
   118  		a.NodeID = node.ID
   119  		return a
   120  	}
   121  
   122  	// 2 jobs with count 10, max parallel 3
   123  	jnss := make([]structs.NamespacedID, 2)
   124  	jobs := make([]*structs.Job, 2)
   125  	for i := 0; i < 2; i++ {
   126  		job := mock.Job()
   127  		jobs[i] = job
   128  		jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
   129  		job.TaskGroups[0].Migrate.MaxParallel = 3
   130  		job.TaskGroups[0].Count = count
   131  		require.Nil(state.UpsertJob(index, job))
   132  		index++
   133  
   134  		var allocs []*structs.Allocation
   135  		for i := 0; i < count; i++ {
   136  			a := newAlloc(drainingNode, job)
   137  			a.DeploymentStatus = &structs.AllocDeploymentStatus{
   138  				Healthy: helper.BoolToPtr(true),
   139  			}
   140  			allocs = append(allocs, a)
   141  		}
   142  
   143  		require.Nil(state.UpsertAllocs(index, allocs))
   144  		index++
   145  
   146  	}
   147  
   148  	// Only register jobs with watcher after creating all data models as
   149  	// once the watcher starts we need to track the index carefully for
   150  	// updating the batch future
   151  	jobWatcher.RegisterJobs(jnss)
   152  
   153  	// Expect a first batch of MaxParallel allocs from each job
   154  	drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0)
   155  
   156  	// Fake migrating the drained allocs by starting new ones and stopping
   157  	// the old ones
   158  	drainedAllocs := make([]*structs.Allocation, len(drains.Allocs))
   159  	for i, a := range drains.Allocs {
   160  		a.DesiredTransition.Migrate = helper.BoolToPtr(true)
   161  
   162  		// create a copy so we can reuse this slice
   163  		drainedAllocs[i] = a.Copy()
   164  	}
   165  	require.Nil(state.UpsertAllocs(index, drainedAllocs))
   166  	drains.Resp.Respond(index, nil)
   167  	index++
   168  
   169  	// Just setting ShouldMigrate should not cause any further drains
   170  	assertJobWatcherOps(t, jobWatcher, 0, 0)
   171  
   172  	// Proceed our fake migration along by creating new allocs and stopping
   173  	// old ones
   174  	replacements := make([]*structs.Allocation, len(drainedAllocs))
   175  	updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2)
   176  	for i, a := range drainedAllocs {
   177  		// Stop drained allocs
   178  		a.DesiredTransition.Migrate = nil
   179  		a.DesiredStatus = structs.AllocDesiredStatusStop
   180  
   181  		// Create a replacement
   182  		replacement := mock.Alloc()
   183  		replacement.JobID = a.Job.ID
   184  		replacement.Job = a.Job
   185  		replacement.TaskGroup = a.TaskGroup
   186  		replacement.NodeID = runningNode.ID
   187  		// start in pending state with no health status
   188  
   189  		updates = append(updates, a, replacement)
   190  		replacements[i] = replacement.Copy()
   191  	}
   192  	require.Nil(state.UpsertAllocs(index, updates))
   193  	index++
   194  
   195  	// The drained allocs stopping cause migrations but no new drains
   196  	// because the replacements have not started
   197  	assertJobWatcherOps(t, jobWatcher, 0, 6)
   198  
   199  	// Finally kickoff further drain activity by "starting" replacements
   200  	for _, a := range replacements {
   201  		a.ClientStatus = structs.AllocClientStatusRunning
   202  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   203  			Healthy: helper.BoolToPtr(true),
   204  		}
   205  	}
   206  	require.Nil(state.UpsertAllocs(index, replacements))
   207  	index++
   208  
   209  	require.NotEmpty(jobWatcher.drainingJobs())
   210  
   211  	// 6 new drains
   212  	drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0)
   213  
   214  	// Fake migrations once more to finish the drain
   215  	drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
   216  	for i, a := range drains.Allocs {
   217  		a.DesiredTransition.Migrate = helper.BoolToPtr(true)
   218  
   219  		// create a copy so we can reuse this slice
   220  		drainedAllocs[i] = a.Copy()
   221  	}
   222  	require.Nil(state.UpsertAllocs(index, drainedAllocs))
   223  	drains.Resp.Respond(index, nil)
   224  	index++
   225  
   226  	assertJobWatcherOps(t, jobWatcher, 0, 0)
   227  
   228  	replacements = make([]*structs.Allocation, len(drainedAllocs))
   229  	updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
   230  	for i, a := range drainedAllocs {
   231  		a.DesiredTransition.Migrate = nil
   232  		a.DesiredStatus = structs.AllocDesiredStatusStop
   233  
   234  		replacement := newAlloc(runningNode, a.Job)
   235  		updates = append(updates, a, replacement)
   236  		replacements[i] = replacement.Copy()
   237  	}
   238  	require.Nil(state.UpsertAllocs(index, updates))
   239  	index++
   240  
   241  	assertJobWatcherOps(t, jobWatcher, 0, 6)
   242  
   243  	for _, a := range replacements {
   244  		a.ClientStatus = structs.AllocClientStatusRunning
   245  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   246  			Healthy: helper.BoolToPtr(true),
   247  		}
   248  	}
   249  	require.Nil(state.UpsertAllocs(index, replacements))
   250  	index++
   251  
   252  	require.NotEmpty(jobWatcher.drainingJobs())
   253  
   254  	// Final 4 new drains
   255  	drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0)
   256  
   257  	// Fake migrations once more to finish the drain
   258  	drainedAllocs = make([]*structs.Allocation, len(drains.Allocs))
   259  	for i, a := range drains.Allocs {
   260  		a.DesiredTransition.Migrate = helper.BoolToPtr(true)
   261  
   262  		// create a copy so we can reuse this slice
   263  		drainedAllocs[i] = a.Copy()
   264  	}
   265  	require.Nil(state.UpsertAllocs(index, drainedAllocs))
   266  	drains.Resp.Respond(index, nil)
   267  	index++
   268  
   269  	assertJobWatcherOps(t, jobWatcher, 0, 0)
   270  
   271  	replacements = make([]*structs.Allocation, len(drainedAllocs))
   272  	updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2)
   273  	for i, a := range drainedAllocs {
   274  		a.DesiredTransition.Migrate = nil
   275  		a.DesiredStatus = structs.AllocDesiredStatusStop
   276  
   277  		replacement := newAlloc(runningNode, a.Job)
   278  		updates = append(updates, a, replacement)
   279  		replacements[i] = replacement.Copy()
   280  	}
   281  	require.Nil(state.UpsertAllocs(index, updates))
   282  	index++
   283  
   284  	assertJobWatcherOps(t, jobWatcher, 0, 4)
   285  
   286  	for _, a := range replacements {
   287  		a.ClientStatus = structs.AllocClientStatusRunning
   288  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   289  			Healthy: helper.BoolToPtr(true),
   290  		}
   291  	}
   292  	require.Nil(state.UpsertAllocs(index, replacements))
   293  
   294  	// No jobs should be left!
   295  	require.Empty(jobWatcher.drainingJobs())
   296  }
   297  
   298  // DrainingJobWatcher tests:
   299  // TODO Test that the watcher cancels its query when a new job is registered
   300  
   301  // handleTaskGroupTestCase is the test case struct for TestHandleTaskGroup
   302  //
   303  // Two nodes will be initialized: one draining and one running.
   304  type handleTaskGroupTestCase struct {
   305  	// Name of test
   306  	Name string
   307  
   308  	// Batch uses a batch job and alloc
   309  	Batch bool
   310  
   311  	// Expectations
   312  	ExpectedDrained  int
   313  	ExpectedMigrated int
   314  	ExpectedDone     bool
   315  
   316  	// Count overrides the default count of 10 if set
   317  	Count int
   318  
   319  	// MaxParallel overrides the default max_parallel of 1 if set
   320  	MaxParallel int
   321  
   322  	// AddAlloc will be called 10 times to create test allocs
   323  	//
   324  	// Allocs default to be healthy on the draining node
   325  	AddAlloc func(i int, a *structs.Allocation, drainingID, runningID string)
   326  }
   327  
   328  func TestHandeTaskGroup_Table(t *testing.T) {
   329  	cases := []handleTaskGroupTestCase{
   330  		{
   331  			// All allocs on draining node
   332  			Name:             "AllDraining",
   333  			ExpectedDrained:  1,
   334  			ExpectedMigrated: 0,
   335  			ExpectedDone:     false,
   336  		},
   337  		{
   338  			// All allocs on non-draining node
   339  			Name:             "AllNonDraining",
   340  			ExpectedDrained:  0,
   341  			ExpectedMigrated: 0,
   342  			ExpectedDone:     true,
   343  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   344  				a.NodeID = runningID
   345  			},
   346  		},
   347  		{
   348  			// Some allocs on non-draining node but not healthy
   349  			Name:             "SomeNonDrainingUnhealthy",
   350  			ExpectedDrained:  0,
   351  			ExpectedMigrated: 0,
   352  			ExpectedDone:     false,
   353  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   354  				if i%2 == 0 {
   355  					a.NodeID = runningID
   356  					a.DeploymentStatus = nil
   357  				}
   358  			},
   359  		},
   360  		{
   361  			// One draining, other allocs on non-draining node and healthy
   362  			Name:             "OneDraining",
   363  			ExpectedDrained:  1,
   364  			ExpectedMigrated: 0,
   365  			ExpectedDone:     false,
   366  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   367  				if i != 0 {
   368  					a.NodeID = runningID
   369  				}
   370  			},
   371  		},
   372  		{
   373  			// One already draining, other allocs on non-draining node and healthy
   374  			Name:             "OneAlreadyDraining",
   375  			ExpectedDrained:  0,
   376  			ExpectedMigrated: 0,
   377  			ExpectedDone:     false,
   378  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   379  				if i == 0 {
   380  					a.DesiredTransition.Migrate = helper.BoolToPtr(true)
   381  					return
   382  				}
   383  				a.NodeID = runningID
   384  			},
   385  		},
   386  		{
   387  			// One already drained, other allocs on non-draining node and healthy
   388  			Name:             "OneAlreadyDrained",
   389  			ExpectedDrained:  0,
   390  			ExpectedMigrated: 1,
   391  			ExpectedDone:     true,
   392  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   393  				if i == 0 {
   394  					a.DesiredStatus = structs.AllocDesiredStatusStop
   395  					return
   396  				}
   397  				a.NodeID = runningID
   398  			},
   399  		},
   400  		{
   401  			// One already drained, other allocs on non-draining node and healthy
   402  			Name:             "OneAlreadyDrainedBatched",
   403  			Batch:            true,
   404  			ExpectedDrained:  0,
   405  			ExpectedMigrated: 1,
   406  			ExpectedDone:     true,
   407  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   408  				if i == 0 {
   409  					a.DesiredStatus = structs.AllocDesiredStatusStop
   410  					return
   411  				}
   412  				a.NodeID = runningID
   413  			},
   414  		},
   415  		{
   416  			// All allocs are terminl, nothing to be drained
   417  			Name:             "AllMigrating",
   418  			ExpectedDrained:  0,
   419  			ExpectedMigrated: 10,
   420  			ExpectedDone:     true,
   421  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   422  				a.DesiredStatus = structs.AllocDesiredStatusStop
   423  			},
   424  		},
   425  		{
   426  			// All allocs are terminl, nothing to be drained
   427  			Name:             "AllMigratingBatch",
   428  			Batch:            true,
   429  			ExpectedDrained:  0,
   430  			ExpectedMigrated: 10,
   431  			ExpectedDone:     true,
   432  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   433  				a.DesiredStatus = structs.AllocDesiredStatusStop
   434  			},
   435  		},
   436  		{
   437  			// All allocs may be drained at once
   438  			Name:             "AllAtOnce",
   439  			ExpectedDrained:  10,
   440  			ExpectedMigrated: 0,
   441  			ExpectedDone:     false,
   442  			MaxParallel:      10,
   443  		},
   444  		{
   445  			// Drain 2
   446  			Name:             "Drain2",
   447  			ExpectedDrained:  2,
   448  			ExpectedMigrated: 0,
   449  			ExpectedDone:     false,
   450  			MaxParallel:      2,
   451  		},
   452  		{
   453  			// One on new node, one drained, and one draining
   454  			ExpectedDrained:  1,
   455  			ExpectedMigrated: 1,
   456  			MaxParallel:      2,
   457  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   458  				switch i {
   459  				case 0:
   460  					// One alloc on running node
   461  					a.NodeID = runningID
   462  				case 1:
   463  					// One alloc already migrated
   464  					a.DesiredStatus = structs.AllocDesiredStatusStop
   465  				}
   466  			},
   467  		},
   468  		{
   469  			// 8 on new node, one drained, and one draining
   470  			ExpectedDrained:  1,
   471  			ExpectedMigrated: 1,
   472  			MaxParallel:      2,
   473  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   474  				switch i {
   475  				case 0, 1, 2, 3, 4, 5, 6, 7:
   476  					a.NodeID = runningID
   477  				case 8:
   478  					a.DesiredStatus = structs.AllocDesiredStatusStop
   479  				}
   480  			},
   481  		},
   482  		{
   483  			// 5 on new node, two drained, and three draining
   484  			ExpectedDrained:  3,
   485  			ExpectedMigrated: 2,
   486  			MaxParallel:      5,
   487  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   488  				switch i {
   489  				case 0, 1, 2, 3, 4:
   490  					a.NodeID = runningID
   491  				case 8, 9:
   492  					a.DesiredStatus = structs.AllocDesiredStatusStop
   493  				}
   494  			},
   495  		},
   496  		{
   497  			// Not all on new node have health set
   498  			Name:             "PendingHealth",
   499  			ExpectedDrained:  1,
   500  			ExpectedMigrated: 1,
   501  			MaxParallel:      3,
   502  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   503  				switch i {
   504  				case 0:
   505  					// Deployment status UNset for 1 on new node
   506  					a.NodeID = runningID
   507  					a.DeploymentStatus = nil
   508  				case 1, 2, 3, 4:
   509  					// Deployment status set for 4 on new node
   510  					a.NodeID = runningID
   511  				case 9:
   512  					a.DesiredStatus = structs.AllocDesiredStatusStop
   513  				}
   514  			},
   515  		},
   516  		{
   517  			// 5 max parallel - 1 migrating - 2 with unset health = 2 drainable
   518  			Name:             "PendingHealthHigherMax",
   519  			ExpectedDrained:  2,
   520  			ExpectedMigrated: 1,
   521  			MaxParallel:      5,
   522  			AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) {
   523  				switch i {
   524  				case 0, 1:
   525  					// Deployment status UNset for 2 on new node
   526  					a.NodeID = runningID
   527  					a.DeploymentStatus = nil
   528  				case 2, 3, 4:
   529  					// Deployment status set for 3 on new node
   530  					a.NodeID = runningID
   531  				case 9:
   532  					a.DesiredStatus = structs.AllocDesiredStatusStop
   533  				}
   534  			},
   535  		},
   536  	}
   537  
   538  	for _, testCase := range cases {
   539  		t.Run(testCase.Name, func(t *testing.T) {
   540  			testHandleTaskGroup(t, testCase)
   541  		})
   542  	}
   543  }
   544  
   545  func testHandleTaskGroup(t *testing.T, tc handleTaskGroupTestCase) {
   546  	t.Parallel()
   547  	require := require.New(t)
   548  	assert := assert.New(t)
   549  
   550  	// Create nodes
   551  	state := state.TestStateStore(t)
   552  	drainingNode, runningNode := testNodes(t, state)
   553  
   554  	job := mock.Job()
   555  	if tc.Batch {
   556  		job = mock.BatchJob()
   557  	}
   558  	job.TaskGroups[0].Count = 10
   559  	if tc.Count > 0 {
   560  		job.TaskGroups[0].Count = tc.Count
   561  	}
   562  	if tc.MaxParallel > 0 {
   563  		job.TaskGroups[0].Migrate.MaxParallel = tc.MaxParallel
   564  	}
   565  	require.Nil(state.UpsertJob(102, job))
   566  
   567  	var allocs []*structs.Allocation
   568  	for i := 0; i < 10; i++ {
   569  		a := mock.Alloc()
   570  		if tc.Batch {
   571  			a = mock.BatchAlloc()
   572  		}
   573  		a.JobID = job.ID
   574  		a.Job = job
   575  		a.TaskGroup = job.TaskGroups[0].Name
   576  
   577  		// Default to being healthy on the draining node
   578  		a.NodeID = drainingNode.ID
   579  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   580  			Healthy: helper.BoolToPtr(true),
   581  		}
   582  		if tc.AddAlloc != nil {
   583  			tc.AddAlloc(i, a, drainingNode.ID, runningNode.ID)
   584  		}
   585  		allocs = append(allocs, a)
   586  	}
   587  
   588  	require.Nil(state.UpsertAllocs(103, allocs))
   589  	snap, err := state.Snapshot()
   590  	require.Nil(err)
   591  
   592  	res := newJobResult()
   593  	require.Nil(handleTaskGroup(snap, tc.Batch, job.TaskGroups[0], allocs, 102, res))
   594  	assert.Lenf(res.drain, tc.ExpectedDrained, "Drain expected %d but found: %d",
   595  		tc.ExpectedDrained, len(res.drain))
   596  	assert.Lenf(res.migrated, tc.ExpectedMigrated, "Migrate expected %d but found: %d",
   597  		tc.ExpectedMigrated, len(res.migrated))
   598  	assert.Equal(tc.ExpectedDone, res.done)
   599  }
   600  
   601  func TestHandleTaskGroup_Migrations(t *testing.T) {
   602  	t.Parallel()
   603  	require := require.New(t)
   604  
   605  	// Create a draining node
   606  	state := state.TestStateStore(t)
   607  	n := mock.Node()
   608  	n.DrainStrategy = &structs.DrainStrategy{
   609  		DrainSpec: structs.DrainSpec{
   610  			Deadline: 5 * time.Minute,
   611  		},
   612  		ForceDeadline: time.Now().Add(1 * time.Minute),
   613  	}
   614  	require.Nil(state.UpsertNode(100, n))
   615  
   616  	job := mock.Job()
   617  	require.Nil(state.UpsertJob(101, job))
   618  
   619  	// Create 10 done allocs
   620  	var allocs []*structs.Allocation
   621  	for i := 0; i < 10; i++ {
   622  		a := mock.Alloc()
   623  		a.Job = job
   624  		a.TaskGroup = job.TaskGroups[0].Name
   625  		a.NodeID = n.ID
   626  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   627  			Healthy: helper.BoolToPtr(false),
   628  		}
   629  
   630  		if i%2 == 0 {
   631  			a.DesiredStatus = structs.AllocDesiredStatusStop
   632  		} else {
   633  			a.ClientStatus = structs.AllocClientStatusFailed
   634  		}
   635  		allocs = append(allocs, a)
   636  	}
   637  	require.Nil(state.UpsertAllocs(102, allocs))
   638  
   639  	snap, err := state.Snapshot()
   640  	require.Nil(err)
   641  
   642  	// Handle before and after indexes as both service and batch
   643  	res := newJobResult()
   644  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
   645  	require.Empty(res.drain)
   646  	require.Len(res.migrated, 10)
   647  	require.True(res.done)
   648  
   649  	res = newJobResult()
   650  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
   651  	require.Empty(res.drain)
   652  	require.Len(res.migrated, 10)
   653  	require.True(res.done)
   654  
   655  	res = newJobResult()
   656  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
   657  	require.Empty(res.drain)
   658  	require.Empty(res.migrated)
   659  	require.True(res.done)
   660  
   661  	res = newJobResult()
   662  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
   663  	require.Empty(res.drain)
   664  	require.Empty(res.migrated)
   665  	require.True(res.done)
   666  }
   667  
   668  // This test asserts that handle task group works when an allocation is on a
   669  // garbage collected node
   670  func TestHandleTaskGroup_GarbageCollectedNode(t *testing.T) {
   671  	t.Parallel()
   672  	require := require.New(t)
   673  
   674  	// Create a draining node
   675  	state := state.TestStateStore(t)
   676  	n := mock.Node()
   677  	n.DrainStrategy = &structs.DrainStrategy{
   678  		DrainSpec: structs.DrainSpec{
   679  			Deadline: 5 * time.Minute,
   680  		},
   681  		ForceDeadline: time.Now().Add(1 * time.Minute),
   682  	}
   683  	require.Nil(state.UpsertNode(100, n))
   684  
   685  	job := mock.Job()
   686  	require.Nil(state.UpsertJob(101, job))
   687  
   688  	// Create 10 done allocs
   689  	var allocs []*structs.Allocation
   690  	for i := 0; i < 10; i++ {
   691  		a := mock.Alloc()
   692  		a.Job = job
   693  		a.TaskGroup = job.TaskGroups[0].Name
   694  		a.NodeID = n.ID
   695  		a.DeploymentStatus = &structs.AllocDeploymentStatus{
   696  			Healthy: helper.BoolToPtr(false),
   697  		}
   698  
   699  		if i%2 == 0 {
   700  			a.DesiredStatus = structs.AllocDesiredStatusStop
   701  		} else {
   702  			a.ClientStatus = structs.AllocClientStatusFailed
   703  		}
   704  		allocs = append(allocs, a)
   705  	}
   706  
   707  	// Make the first one be on a GC'd node
   708  	allocs[0].NodeID = uuid.Generate()
   709  	require.Nil(state.UpsertAllocs(102, allocs))
   710  
   711  	snap, err := state.Snapshot()
   712  	require.Nil(err)
   713  
   714  	// Handle before and after indexes as both service and batch
   715  	res := newJobResult()
   716  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 101, res))
   717  	require.Empty(res.drain)
   718  	require.Len(res.migrated, 9)
   719  	require.True(res.done)
   720  
   721  	res = newJobResult()
   722  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 101, res))
   723  	require.Empty(res.drain)
   724  	require.Len(res.migrated, 9)
   725  	require.True(res.done)
   726  
   727  	res = newJobResult()
   728  	require.Nil(handleTaskGroup(snap, false, job.TaskGroups[0], allocs, 103, res))
   729  	require.Empty(res.drain)
   730  	require.Empty(res.migrated)
   731  	require.True(res.done)
   732  
   733  	res = newJobResult()
   734  	require.Nil(handleTaskGroup(snap, true, job.TaskGroups[0], allocs, 103, res))
   735  	require.Empty(res.drain)
   736  	require.Empty(res.migrated)
   737  	require.True(res.done)
   738  }