github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/core_sched_test.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  	"time"
     7  
     8  	memdb "github.com/hashicorp/go-memdb"
     9  	"github.com/hashicorp/nomad/helper/uuid"
    10  	"github.com/hashicorp/nomad/nomad/mock"
    11  	"github.com/hashicorp/nomad/nomad/structs"
    12  	"github.com/hashicorp/nomad/testutil"
    13  	"github.com/stretchr/testify/assert"
    14  	"github.com/stretchr/testify/require"
    15  )
    16  
    17  func TestCoreScheduler_EvalGC(t *testing.T) {
    18  	t.Parallel()
    19  	s1 := TestServer(t, nil)
    20  	defer s1.Shutdown()
    21  	testutil.WaitForLeader(t, s1.RPC)
    22  	require := require.New(t)
    23  
    24  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
    25  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
    26  
    27  	// Insert "dead" eval
    28  	state := s1.fsm.State()
    29  	eval := mock.Eval()
    30  	eval.Status = structs.EvalStatusFailed
    31  	state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
    32  	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
    33  	require.Nil(err)
    34  
    35  	// Insert mock job with rescheduling disabled
    36  	job := mock.Job()
    37  	job.ID = eval.JobID
    38  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
    39  		Attempts: 0,
    40  		Interval: 0 * time.Second,
    41  	}
    42  	err = state.UpsertJob(1001, job)
    43  	require.Nil(err)
    44  
    45  	// Insert "dead" alloc
    46  	alloc := mock.Alloc()
    47  	alloc.EvalID = eval.ID
    48  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
    49  	alloc.JobID = eval.JobID
    50  	alloc.TaskGroup = job.TaskGroups[0].Name
    51  
    52  	// Insert "lost" alloc
    53  	alloc2 := mock.Alloc()
    54  	alloc2.EvalID = eval.ID
    55  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
    56  	alloc2.ClientStatus = structs.AllocClientStatusLost
    57  	alloc2.JobID = eval.JobID
    58  	alloc2.TaskGroup = job.TaskGroups[0].Name
    59  	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
    60  	if err != nil {
    61  		t.Fatalf("err: %v", err)
    62  	}
    63  
    64  	// Update the time tables to make this work
    65  	tt := s1.fsm.TimeTable()
    66  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
    67  
    68  	// Create a core scheduler
    69  	snap, err := state.Snapshot()
    70  	if err != nil {
    71  		t.Fatalf("err: %v", err)
    72  	}
    73  	core := NewCoreScheduler(s1, snap)
    74  
    75  	// Attempt the GC
    76  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
    77  	err = core.Process(gc)
    78  	if err != nil {
    79  		t.Fatalf("err: %v", err)
    80  	}
    81  
    82  	// Should be gone
    83  	ws := memdb.NewWatchSet()
    84  	out, err := state.EvalByID(ws, eval.ID)
    85  	if err != nil {
    86  		t.Fatalf("err: %v", err)
    87  	}
    88  	if out != nil {
    89  		t.Fatalf("bad: %v", out)
    90  	}
    91  
    92  	outA, err := state.AllocByID(ws, alloc.ID)
    93  	if err != nil {
    94  		t.Fatalf("err: %v", err)
    95  	}
    96  	if outA != nil {
    97  		t.Fatalf("bad: %v", outA)
    98  	}
    99  
   100  	outA2, err := state.AllocByID(ws, alloc2.ID)
   101  	if err != nil {
   102  		t.Fatalf("err: %v", err)
   103  	}
   104  	if outA2 != nil {
   105  		t.Fatalf("bad: %v", outA2)
   106  	}
   107  }
   108  
   109  // Tests GC behavior on allocations being rescheduled
   110  func TestCoreScheduler_EvalGC_ReshedulingAllocs(t *testing.T) {
   111  	t.Parallel()
   112  	s1 := TestServer(t, nil)
   113  	defer s1.Shutdown()
   114  	testutil.WaitForLeader(t, s1.RPC)
   115  	require := require.New(t)
   116  
   117  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   118  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   119  
   120  	// Insert "dead" eval
   121  	state := s1.fsm.State()
   122  	eval := mock.Eval()
   123  	eval.Status = structs.EvalStatusFailed
   124  	state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
   125  	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
   126  	require.Nil(err)
   127  
   128  	// Insert "pending" eval for same job
   129  	eval2 := mock.Eval()
   130  	eval2.JobID = eval.JobID
   131  	state.UpsertJobSummary(999, mock.JobSummary(eval2.JobID))
   132  	err = state.UpsertEvals(1003, []*structs.Evaluation{eval2})
   133  	require.Nil(err)
   134  
   135  	// Insert mock job with default reschedule policy of 2 in 10 minutes
   136  	job := mock.Job()
   137  	job.ID = eval.JobID
   138  
   139  	err = state.UpsertJob(1001, job)
   140  	require.Nil(err)
   141  
   142  	// Insert failed alloc with an old reschedule attempt, can be GCed
   143  	alloc := mock.Alloc()
   144  	alloc.EvalID = eval.ID
   145  	alloc.DesiredStatus = structs.AllocDesiredStatusRun
   146  	alloc.ClientStatus = structs.AllocClientStatusFailed
   147  	alloc.JobID = eval.JobID
   148  	alloc.TaskGroup = job.TaskGroups[0].Name
   149  	alloc.RescheduleTracker = &structs.RescheduleTracker{
   150  		Events: []*structs.RescheduleEvent{
   151  			{
   152  				RescheduleTime: time.Now().Add(-1 * time.Hour).UTC().UnixNano(),
   153  				PrevNodeID:     uuid.Generate(),
   154  				PrevAllocID:    uuid.Generate(),
   155  			},
   156  		},
   157  	}
   158  
   159  	// Insert another failed alloc with a recent reschedule attempt, can't be GCed
   160  	alloc2 := mock.Alloc()
   161  	alloc2.EvalID = eval.ID
   162  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   163  	alloc2.ClientStatus = structs.AllocClientStatusLost
   164  	alloc2.JobID = eval.JobID
   165  	alloc2.TaskGroup = job.TaskGroups[0].Name
   166  	alloc2.RescheduleTracker = &structs.RescheduleTracker{
   167  		Events: []*structs.RescheduleEvent{
   168  			{
   169  				RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
   170  				PrevNodeID:     uuid.Generate(),
   171  				PrevAllocID:    uuid.Generate(),
   172  			},
   173  		},
   174  	}
   175  	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
   176  	require.Nil(err)
   177  
   178  	// Update the time tables to make this work
   179  	tt := s1.fsm.TimeTable()
   180  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
   181  
   182  	// Create a core scheduler
   183  	snap, err := state.Snapshot()
   184  	if err != nil {
   185  		t.Fatalf("err: %v", err)
   186  	}
   187  	core := NewCoreScheduler(s1, snap)
   188  
   189  	// Attempt the GC, job has all terminal allocs and one pending eval
   190  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
   191  	err = core.Process(gc)
   192  	require.Nil(err)
   193  
   194  	// Eval should still exist
   195  	ws := memdb.NewWatchSet()
   196  	out, err := state.EvalByID(ws, eval.ID)
   197  	require.Nil(err)
   198  	require.Equal(eval.ID, out.ID)
   199  
   200  	outA, err := state.AllocByID(ws, alloc.ID)
   201  	require.Nil(err)
   202  	require.Nil(outA)
   203  
   204  	outA2, err := state.AllocByID(ws, alloc2.ID)
   205  	require.Nil(err)
   206  	require.Equal(alloc2.ID, outA2.ID)
   207  
   208  }
   209  
   210  // Tests GC behavior on stopped job with reschedulable allocs
   211  func TestCoreScheduler_EvalGC_StoppedJob_Reschedulable(t *testing.T) {
   212  	t.Parallel()
   213  	s1 := TestServer(t, nil)
   214  	defer s1.Shutdown()
   215  	testutil.WaitForLeader(t, s1.RPC)
   216  	require := require.New(t)
   217  
   218  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   219  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   220  
   221  	// Insert "dead" eval
   222  	state := s1.fsm.State()
   223  	eval := mock.Eval()
   224  	eval.Status = structs.EvalStatusFailed
   225  	state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
   226  	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
   227  	require.Nil(err)
   228  
   229  	// Insert mock stopped job with default reschedule policy of 2 in 10 minutes
   230  	job := mock.Job()
   231  	job.ID = eval.JobID
   232  	job.Stop = true
   233  
   234  	err = state.UpsertJob(1001, job)
   235  	require.Nil(err)
   236  
   237  	// Insert failed alloc with a recent reschedule attempt
   238  	alloc := mock.Alloc()
   239  	alloc.EvalID = eval.ID
   240  	alloc.DesiredStatus = structs.AllocDesiredStatusRun
   241  	alloc.ClientStatus = structs.AllocClientStatusLost
   242  	alloc.JobID = eval.JobID
   243  	alloc.TaskGroup = job.TaskGroups[0].Name
   244  	alloc.RescheduleTracker = &structs.RescheduleTracker{
   245  		Events: []*structs.RescheduleEvent{
   246  			{
   247  				RescheduleTime: time.Now().Add(-3 * time.Minute).UTC().UnixNano(),
   248  				PrevNodeID:     uuid.Generate(),
   249  				PrevAllocID:    uuid.Generate(),
   250  			},
   251  		},
   252  	}
   253  	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc})
   254  	require.Nil(err)
   255  
   256  	// Update the time tables to make this work
   257  	tt := s1.fsm.TimeTable()
   258  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
   259  
   260  	// Create a core scheduler
   261  	snap, err := state.Snapshot()
   262  	if err != nil {
   263  		t.Fatalf("err: %v", err)
   264  	}
   265  	core := NewCoreScheduler(s1, snap)
   266  
   267  	// Attempt the GC
   268  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
   269  	err = core.Process(gc)
   270  	require.Nil(err)
   271  
   272  	// Eval should not exist
   273  	ws := memdb.NewWatchSet()
   274  	out, err := state.EvalByID(ws, eval.ID)
   275  	require.Nil(err)
   276  	require.Nil(out)
   277  
   278  	// Alloc should not exist
   279  	outA, err := state.AllocByID(ws, alloc.ID)
   280  	require.Nil(err)
   281  	require.Nil(outA)
   282  
   283  }
   284  
   285  // An EvalGC should never reap a batch job that has not been stopped
   286  func TestCoreScheduler_EvalGC_Batch(t *testing.T) {
   287  	t.Parallel()
   288  	s1 := TestServer(t, nil)
   289  	defer s1.Shutdown()
   290  	testutil.WaitForLeader(t, s1.RPC)
   291  
   292  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   293  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   294  
   295  	// Insert a "dead" job
   296  	state := s1.fsm.State()
   297  	job := mock.Job()
   298  	job.Type = structs.JobTypeBatch
   299  	job.Status = structs.JobStatusDead
   300  	err := state.UpsertJob(1000, job)
   301  	if err != nil {
   302  		t.Fatalf("err: %v", err)
   303  	}
   304  
   305  	// Insert "complete" eval
   306  	eval := mock.Eval()
   307  	eval.Status = structs.EvalStatusComplete
   308  	eval.Type = structs.JobTypeBatch
   309  	eval.JobID = job.ID
   310  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
   311  	if err != nil {
   312  		t.Fatalf("err: %v", err)
   313  	}
   314  
   315  	// Insert "failed" alloc
   316  	alloc := mock.Alloc()
   317  	alloc.JobID = job.ID
   318  	alloc.EvalID = eval.ID
   319  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   320  
   321  	// Insert "lost" alloc
   322  	alloc2 := mock.Alloc()
   323  	alloc2.JobID = job.ID
   324  	alloc2.EvalID = eval.ID
   325  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   326  	alloc2.ClientStatus = structs.AllocClientStatusLost
   327  
   328  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
   329  	if err != nil {
   330  		t.Fatalf("err: %v", err)
   331  	}
   332  
   333  	// Update the time tables to make this work
   334  	tt := s1.fsm.TimeTable()
   335  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
   336  
   337  	// Create a core scheduler
   338  	snap, err := state.Snapshot()
   339  	if err != nil {
   340  		t.Fatalf("err: %v", err)
   341  	}
   342  	core := NewCoreScheduler(s1, snap)
   343  
   344  	// Attempt the GC
   345  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
   346  	err = core.Process(gc)
   347  	if err != nil {
   348  		t.Fatalf("err: %v", err)
   349  	}
   350  
   351  	// Nothing should be gone
   352  	ws := memdb.NewWatchSet()
   353  	out, err := state.EvalByID(ws, eval.ID)
   354  	if err != nil {
   355  		t.Fatalf("err: %v", err)
   356  	}
   357  	if out == nil {
   358  		t.Fatalf("bad: %v", out)
   359  	}
   360  
   361  	outA, err := state.AllocByID(ws, alloc.ID)
   362  	if err != nil {
   363  		t.Fatalf("err: %v", err)
   364  	}
   365  	if outA == nil {
   366  		t.Fatalf("bad: %v", outA)
   367  	}
   368  
   369  	outA2, err := state.AllocByID(ws, alloc2.ID)
   370  	if err != nil {
   371  		t.Fatalf("err: %v", err)
   372  	}
   373  	if outA2 == nil {
   374  		t.Fatalf("bad: %v", outA2)
   375  	}
   376  
   377  	outB, err := state.JobByID(ws, job.Namespace, job.ID)
   378  	if err != nil {
   379  		t.Fatalf("err: %v", err)
   380  	}
   381  	if outB == nil {
   382  		t.Fatalf("bad: %v", outB)
   383  	}
   384  }
   385  
   386  // An EvalGC should  reap a batch job that has been stopped
   387  func TestCoreScheduler_EvalGC_BatchStopped(t *testing.T) {
   388  	t.Parallel()
   389  	s1 := TestServer(t, nil)
   390  	defer s1.Shutdown()
   391  	testutil.WaitForLeader(t, s1.RPC)
   392  
   393  	require := require.New(t)
   394  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   395  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   396  
   397  	// Create a "dead" job
   398  	state := s1.fsm.State()
   399  	job := mock.Job()
   400  	job.Type = structs.JobTypeBatch
   401  	job.Status = structs.JobStatusDead
   402  	job.Stop = true
   403  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
   404  		Attempts: 0,
   405  		Interval: 0 * time.Second,
   406  	}
   407  	err := state.UpsertJob(1001, job)
   408  	require.Nil(err)
   409  
   410  	// Insert "complete" eval
   411  	eval := mock.Eval()
   412  	eval.Status = structs.EvalStatusComplete
   413  	eval.Type = structs.JobTypeBatch
   414  	eval.JobID = job.ID
   415  	err = state.UpsertEvals(1002, []*structs.Evaluation{eval})
   416  	require.Nil(err)
   417  
   418  	// Insert "failed" alloc
   419  	alloc := mock.Alloc()
   420  	alloc.JobID = job.ID
   421  	alloc.EvalID = eval.ID
   422  	alloc.TaskGroup = job.TaskGroups[0].Name
   423  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   424  
   425  	// Insert "lost" alloc
   426  	alloc2 := mock.Alloc()
   427  	alloc2.JobID = job.ID
   428  	alloc2.EvalID = eval.ID
   429  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   430  	alloc2.ClientStatus = structs.AllocClientStatusLost
   431  	alloc2.TaskGroup = job.TaskGroups[0].Name
   432  
   433  	err = state.UpsertAllocs(1003, []*structs.Allocation{alloc, alloc2})
   434  	if err != nil {
   435  		t.Fatalf("err: %v", err)
   436  	}
   437  
   438  	// Update the time tables to make this work
   439  	tt := s1.fsm.TimeTable()
   440  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
   441  
   442  	// Create a core scheduler
   443  	snap, err := state.Snapshot()
   444  	if err != nil {
   445  		t.Fatalf("err: %v", err)
   446  	}
   447  	core := NewCoreScheduler(s1, snap)
   448  
   449  	// Attempt the GC
   450  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
   451  	err = core.Process(gc)
   452  	if err != nil {
   453  		t.Fatalf("err: %v", err)
   454  	}
   455  
   456  	// Everything should be gone
   457  	ws := memdb.NewWatchSet()
   458  	out, err := state.EvalByID(ws, eval.ID)
   459  	if err != nil {
   460  		t.Fatalf("err: %v", err)
   461  	}
   462  	if out != nil {
   463  		t.Fatalf("bad: %v", out)
   464  	}
   465  
   466  	outA, err := state.AllocByID(ws, alloc.ID)
   467  	if err != nil {
   468  		t.Fatalf("err: %v", err)
   469  	}
   470  	if outA != nil {
   471  		t.Fatalf("bad: %v", outA)
   472  	}
   473  
   474  	outA2, err := state.AllocByID(ws, alloc2.ID)
   475  	if err != nil {
   476  		t.Fatalf("err: %v", err)
   477  	}
   478  	if outA2 != nil {
   479  		t.Fatalf("bad: %v", outA2)
   480  	}
   481  }
   482  
   483  func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
   484  	t.Parallel()
   485  	s1 := TestServer(t, nil)
   486  	defer s1.Shutdown()
   487  	testutil.WaitForLeader(t, s1.RPC)
   488  	require := require.New(t)
   489  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   490  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   491  
   492  	// Insert "dead" eval
   493  	state := s1.fsm.State()
   494  	eval := mock.Eval()
   495  	eval.Status = structs.EvalStatusComplete
   496  	state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
   497  	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
   498  	if err != nil {
   499  		t.Fatalf("err: %v", err)
   500  	}
   501  
   502  	// Create mock job with id same as eval
   503  	job := mock.Job()
   504  	job.ID = eval.JobID
   505  
   506  	// Insert "dead" alloc
   507  	alloc := mock.Alloc()
   508  	alloc.JobID = job.ID
   509  	alloc.EvalID = eval.ID
   510  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   511  	alloc.TaskGroup = job.TaskGroups[0].Name
   512  	state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   513  
   514  	// Insert "lost" alloc
   515  	alloc2 := mock.Alloc()
   516  	alloc2.JobID = job.ID
   517  	alloc2.EvalID = eval.ID
   518  	alloc2.TaskGroup = job.TaskGroups[0].Name
   519  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   520  	alloc2.ClientStatus = structs.AllocClientStatusLost
   521  
   522  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
   523  	if err != nil {
   524  		t.Fatalf("err: %v", err)
   525  	}
   526  
   527  	// Insert "running" alloc
   528  	alloc3 := mock.Alloc()
   529  	alloc3.EvalID = eval.ID
   530  	alloc3.JobID = job.ID
   531  	state.UpsertJobSummary(1003, mock.JobSummary(alloc3.JobID))
   532  	err = state.UpsertAllocs(1004, []*structs.Allocation{alloc3})
   533  	if err != nil {
   534  		t.Fatalf("err: %v", err)
   535  	}
   536  
   537  	// Insert mock job with rescheduling disabled
   538  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
   539  		Attempts: 0,
   540  		Interval: 0 * time.Second,
   541  	}
   542  	err = state.UpsertJob(1001, job)
   543  	require.Nil(err)
   544  
   545  	// Update the time tables to make this work
   546  	tt := s1.fsm.TimeTable()
   547  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
   548  
   549  	// Create a core scheduler
   550  	snap, err := state.Snapshot()
   551  	if err != nil {
   552  		t.Fatalf("err: %v", err)
   553  	}
   554  	core := NewCoreScheduler(s1, snap)
   555  
   556  	// Attempt the GC
   557  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
   558  	err = core.Process(gc)
   559  	if err != nil {
   560  		t.Fatalf("err: %v", err)
   561  	}
   562  
   563  	// Should not be gone
   564  	ws := memdb.NewWatchSet()
   565  	out, err := state.EvalByID(ws, eval.ID)
   566  	if err != nil {
   567  		t.Fatalf("err: %v", err)
   568  	}
   569  	if out == nil {
   570  		t.Fatalf("bad: %v", out)
   571  	}
   572  
   573  	outA, err := state.AllocByID(ws, alloc3.ID)
   574  	if err != nil {
   575  		t.Fatalf("err: %v", err)
   576  	}
   577  	if outA == nil {
   578  		t.Fatalf("bad: %v", outA)
   579  	}
   580  
   581  	// Should be gone
   582  	outB, err := state.AllocByID(ws, alloc.ID)
   583  	if err != nil {
   584  		t.Fatalf("err: %v", err)
   585  	}
   586  	if outB != nil {
   587  		t.Fatalf("bad: %v", outB)
   588  	}
   589  
   590  	outC, err := state.AllocByID(ws, alloc2.ID)
   591  	if err != nil {
   592  		t.Fatalf("err: %v", err)
   593  	}
   594  	if outC != nil {
   595  		t.Fatalf("bad: %v", outC)
   596  	}
   597  }
   598  
   599  func TestCoreScheduler_EvalGC_Force(t *testing.T) {
   600  	t.Parallel()
   601  	for _, withAcl := range []bool{false, true} {
   602  		t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
   603  			require := require.New(t)
   604  			var server *Server
   605  			if withAcl {
   606  				server, _ = TestACLServer(t, nil)
   607  			} else {
   608  				server = TestServer(t, nil)
   609  			}
   610  			defer server.Shutdown()
   611  			testutil.WaitForLeader(t, server.RPC)
   612  
   613  			// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   614  			server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   615  
   616  			// Insert "dead" eval
   617  			state := server.fsm.State()
   618  			eval := mock.Eval()
   619  			eval.Status = structs.EvalStatusFailed
   620  			state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
   621  			err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
   622  			if err != nil {
   623  				t.Fatalf("err: %v", err)
   624  			}
   625  
   626  			// Insert mock job with rescheduling disabled
   627  			job := mock.Job()
   628  			job.ID = eval.JobID
   629  			job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
   630  				Attempts: 0,
   631  				Interval: 0 * time.Second,
   632  			}
   633  			err = state.UpsertJob(1001, job)
   634  			require.Nil(err)
   635  
   636  			// Insert "dead" alloc
   637  			alloc := mock.Alloc()
   638  			alloc.EvalID = eval.ID
   639  			alloc.DesiredStatus = structs.AllocDesiredStatusStop
   640  			alloc.TaskGroup = job.TaskGroups[0].Name
   641  			state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   642  			err = state.UpsertAllocs(1002, []*structs.Allocation{alloc})
   643  			if err != nil {
   644  				t.Fatalf("err: %v", err)
   645  			}
   646  
   647  			// Create a core scheduler
   648  			snap, err := state.Snapshot()
   649  			if err != nil {
   650  				t.Fatalf("err: %v", err)
   651  			}
   652  			core := NewCoreScheduler(server, snap)
   653  
   654  			// Attempt the GC
   655  			gc := server.coreJobEval(structs.CoreJobForceGC, 1002)
   656  			err = core.Process(gc)
   657  			if err != nil {
   658  				t.Fatalf("err: %v", err)
   659  			}
   660  
   661  			// Should be gone
   662  			ws := memdb.NewWatchSet()
   663  			out, err := state.EvalByID(ws, eval.ID)
   664  			if err != nil {
   665  				t.Fatalf("err: %v", err)
   666  			}
   667  			if out != nil {
   668  				t.Fatalf("bad: %v", out)
   669  			}
   670  
   671  			outA, err := state.AllocByID(ws, alloc.ID)
   672  			if err != nil {
   673  				t.Fatalf("err: %v", err)
   674  			}
   675  			if outA != nil {
   676  				t.Fatalf("bad: %v", outA)
   677  			}
   678  		})
   679  	}
   680  }
   681  
   682  func TestCoreScheduler_NodeGC(t *testing.T) {
   683  	t.Parallel()
   684  	for _, withAcl := range []bool{false, true} {
   685  		t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
   686  			var server *Server
   687  			if withAcl {
   688  				server, _ = TestACLServer(t, nil)
   689  			} else {
   690  				server = TestServer(t, nil)
   691  			}
   692  			defer server.Shutdown()
   693  			testutil.WaitForLeader(t, server.RPC)
   694  
   695  			// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   696  			server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   697  
   698  			// Insert "dead" node
   699  			state := server.fsm.State()
   700  			node := mock.Node()
   701  			node.Status = structs.NodeStatusDown
   702  			err := state.UpsertNode(1000, node)
   703  			if err != nil {
   704  				t.Fatalf("err: %v", err)
   705  			}
   706  
   707  			// Update the time tables to make this work
   708  			tt := server.fsm.TimeTable()
   709  			tt.Witness(2000, time.Now().UTC().Add(-1*server.config.NodeGCThreshold))
   710  
   711  			// Create a core scheduler
   712  			snap, err := state.Snapshot()
   713  			if err != nil {
   714  				t.Fatalf("err: %v", err)
   715  			}
   716  			core := NewCoreScheduler(server, snap)
   717  
   718  			// Attempt the GC
   719  			gc := server.coreJobEval(structs.CoreJobNodeGC, 2000)
   720  			err = core.Process(gc)
   721  			if err != nil {
   722  				t.Fatalf("err: %v", err)
   723  			}
   724  
   725  			// Should be gone
   726  			ws := memdb.NewWatchSet()
   727  			out, err := state.NodeByID(ws, node.ID)
   728  			if err != nil {
   729  				t.Fatalf("err: %v", err)
   730  			}
   731  			if out != nil {
   732  				t.Fatalf("bad: %v", out)
   733  			}
   734  		})
   735  	}
   736  }
   737  
   738  func TestCoreScheduler_NodeGC_TerminalAllocs(t *testing.T) {
   739  	t.Parallel()
   740  	s1 := TestServer(t, nil)
   741  	defer s1.Shutdown()
   742  	testutil.WaitForLeader(t, s1.RPC)
   743  
   744  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   745  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   746  
   747  	// Insert "dead" node
   748  	state := s1.fsm.State()
   749  	node := mock.Node()
   750  	node.Status = structs.NodeStatusDown
   751  	err := state.UpsertNode(1000, node)
   752  	if err != nil {
   753  		t.Fatalf("err: %v", err)
   754  	}
   755  
   756  	// Insert a terminal alloc on that node
   757  	alloc := mock.Alloc()
   758  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   759  	state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   760  	if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil {
   761  		t.Fatalf("err: %v", err)
   762  	}
   763  
   764  	// Update the time tables to make this work
   765  	tt := s1.fsm.TimeTable()
   766  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold))
   767  
   768  	// Create a core scheduler
   769  	snap, err := state.Snapshot()
   770  	if err != nil {
   771  		t.Fatalf("err: %v", err)
   772  	}
   773  	core := NewCoreScheduler(s1, snap)
   774  
   775  	// Attempt the GC
   776  	gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000)
   777  	err = core.Process(gc)
   778  	if err != nil {
   779  		t.Fatalf("err: %v", err)
   780  	}
   781  
   782  	// Should be gone
   783  	ws := memdb.NewWatchSet()
   784  	out, err := state.NodeByID(ws, node.ID)
   785  	if err != nil {
   786  		t.Fatalf("err: %v", err)
   787  	}
   788  	if out != nil {
   789  		t.Fatalf("bad: %v", out)
   790  	}
   791  }
   792  
   793  func TestCoreScheduler_NodeGC_RunningAllocs(t *testing.T) {
   794  	t.Parallel()
   795  	s1 := TestServer(t, nil)
   796  	defer s1.Shutdown()
   797  	testutil.WaitForLeader(t, s1.RPC)
   798  
   799  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   800  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   801  
   802  	// Insert "dead" node
   803  	state := s1.fsm.State()
   804  	node := mock.Node()
   805  	node.Status = structs.NodeStatusDown
   806  	err := state.UpsertNode(1000, node)
   807  	if err != nil {
   808  		t.Fatalf("err: %v", err)
   809  	}
   810  
   811  	// Insert a running alloc on that node
   812  	alloc := mock.Alloc()
   813  	alloc.NodeID = node.ID
   814  	alloc.DesiredStatus = structs.AllocDesiredStatusRun
   815  	alloc.ClientStatus = structs.AllocClientStatusRunning
   816  	state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   817  	if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil {
   818  		t.Fatalf("err: %v", err)
   819  	}
   820  
   821  	// Update the time tables to make this work
   822  	tt := s1.fsm.TimeTable()
   823  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold))
   824  
   825  	// Create a core scheduler
   826  	snap, err := state.Snapshot()
   827  	if err != nil {
   828  		t.Fatalf("err: %v", err)
   829  	}
   830  	core := NewCoreScheduler(s1, snap)
   831  
   832  	// Attempt the GC
   833  	gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000)
   834  	err = core.Process(gc)
   835  	if err != nil {
   836  		t.Fatalf("err: %v", err)
   837  	}
   838  
   839  	// Should still be here
   840  	ws := memdb.NewWatchSet()
   841  	out, err := state.NodeByID(ws, node.ID)
   842  	if err != nil {
   843  		t.Fatalf("err: %v", err)
   844  	}
   845  	if out == nil {
   846  		t.Fatalf("bad: %v", out)
   847  	}
   848  }
   849  
   850  func TestCoreScheduler_NodeGC_Force(t *testing.T) {
   851  	t.Parallel()
   852  	s1 := TestServer(t, nil)
   853  	defer s1.Shutdown()
   854  	testutil.WaitForLeader(t, s1.RPC)
   855  
   856  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   857  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   858  
   859  	// Insert "dead" node
   860  	state := s1.fsm.State()
   861  	node := mock.Node()
   862  	node.Status = structs.NodeStatusDown
   863  	err := state.UpsertNode(1000, node)
   864  	if err != nil {
   865  		t.Fatalf("err: %v", err)
   866  	}
   867  
   868  	// Create a core scheduler
   869  	snap, err := state.Snapshot()
   870  	if err != nil {
   871  		t.Fatalf("err: %v", err)
   872  	}
   873  	core := NewCoreScheduler(s1, snap)
   874  
   875  	// Attempt the GC
   876  	gc := s1.coreJobEval(structs.CoreJobForceGC, 1000)
   877  	err = core.Process(gc)
   878  	if err != nil {
   879  		t.Fatalf("err: %v", err)
   880  	}
   881  
   882  	// Should be gone
   883  	ws := memdb.NewWatchSet()
   884  	out, err := state.NodeByID(ws, node.ID)
   885  	if err != nil {
   886  		t.Fatalf("err: %v", err)
   887  	}
   888  	if out != nil {
   889  		t.Fatalf("bad: %v", out)
   890  	}
   891  }
   892  
   893  func TestCoreScheduler_JobGC_OutstandingEvals(t *testing.T) {
   894  	t.Parallel()
   895  	s1 := TestServer(t, nil)
   896  	defer s1.Shutdown()
   897  	testutil.WaitForLeader(t, s1.RPC)
   898  
   899  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   900  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   901  
   902  	// Insert job.
   903  	state := s1.fsm.State()
   904  	job := mock.Job()
   905  	job.Type = structs.JobTypeBatch
   906  	job.Status = structs.JobStatusDead
   907  	err := state.UpsertJob(1000, job)
   908  	if err != nil {
   909  		t.Fatalf("err: %v", err)
   910  	}
   911  
   912  	// Insert two evals, one terminal and one not
   913  	eval := mock.Eval()
   914  	eval.JobID = job.ID
   915  	eval.Status = structs.EvalStatusComplete
   916  
   917  	eval2 := mock.Eval()
   918  	eval2.JobID = job.ID
   919  	eval2.Status = structs.EvalStatusPending
   920  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
   921  	if err != nil {
   922  		t.Fatalf("err: %v", err)
   923  	}
   924  
   925  	// Update the time tables to make this work
   926  	tt := s1.fsm.TimeTable()
   927  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
   928  
   929  	// Create a core scheduler
   930  	snap, err := state.Snapshot()
   931  	if err != nil {
   932  		t.Fatalf("err: %v", err)
   933  	}
   934  	core := NewCoreScheduler(s1, snap)
   935  
   936  	// Attempt the GC
   937  	gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
   938  	err = core.Process(gc)
   939  	if err != nil {
   940  		t.Fatalf("err: %v", err)
   941  	}
   942  
   943  	// Should still exist
   944  	ws := memdb.NewWatchSet()
   945  	out, err := state.JobByID(ws, job.Namespace, job.ID)
   946  	if err != nil {
   947  		t.Fatalf("err: %v", err)
   948  	}
   949  	if out == nil {
   950  		t.Fatalf("bad: %v", out)
   951  	}
   952  
   953  	outE, err := state.EvalByID(ws, eval.ID)
   954  	if err != nil {
   955  		t.Fatalf("err: %v", err)
   956  	}
   957  	if outE == nil {
   958  		t.Fatalf("bad: %v", outE)
   959  	}
   960  
   961  	outE2, err := state.EvalByID(ws, eval2.ID)
   962  	if err != nil {
   963  		t.Fatalf("err: %v", err)
   964  	}
   965  	if outE2 == nil {
   966  		t.Fatalf("bad: %v", outE2)
   967  	}
   968  
   969  	// Update the second eval to be terminal
   970  	eval2.Status = structs.EvalStatusComplete
   971  	err = state.UpsertEvals(1003, []*structs.Evaluation{eval2})
   972  	if err != nil {
   973  		t.Fatalf("err: %v", err)
   974  	}
   975  
   976  	// Create a core scheduler
   977  	snap, err = state.Snapshot()
   978  	if err != nil {
   979  		t.Fatalf("err: %v", err)
   980  	}
   981  	core = NewCoreScheduler(s1, snap)
   982  
   983  	// Attempt the GC
   984  	gc = s1.coreJobEval(structs.CoreJobJobGC, 2000)
   985  	err = core.Process(gc)
   986  	if err != nil {
   987  		t.Fatalf("err: %v", err)
   988  	}
   989  
   990  	// Should not still exist
   991  	out, err = state.JobByID(ws, job.Namespace, job.ID)
   992  	if err != nil {
   993  		t.Fatalf("err: %v", err)
   994  	}
   995  	if out != nil {
   996  		t.Fatalf("bad: %v", out)
   997  	}
   998  
   999  	outE, err = state.EvalByID(ws, eval.ID)
  1000  	if err != nil {
  1001  		t.Fatalf("err: %v", err)
  1002  	}
  1003  	if outE != nil {
  1004  		t.Fatalf("bad: %v", outE)
  1005  	}
  1006  
  1007  	outE2, err = state.EvalByID(ws, eval2.ID)
  1008  	if err != nil {
  1009  		t.Fatalf("err: %v", err)
  1010  	}
  1011  	if outE2 != nil {
  1012  		t.Fatalf("bad: %v", outE2)
  1013  	}
  1014  }
  1015  
  1016  func TestCoreScheduler_JobGC_OutstandingAllocs(t *testing.T) {
  1017  	t.Parallel()
  1018  	s1 := TestServer(t, nil)
  1019  	defer s1.Shutdown()
  1020  	testutil.WaitForLeader(t, s1.RPC)
  1021  
  1022  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1023  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1024  
  1025  	// Insert job.
  1026  	state := s1.fsm.State()
  1027  	job := mock.Job()
  1028  	job.Type = structs.JobTypeBatch
  1029  	job.Status = structs.JobStatusDead
  1030  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1031  		Attempts: 0,
  1032  		Interval: 0 * time.Second,
  1033  	}
  1034  	err := state.UpsertJob(1000, job)
  1035  	if err != nil {
  1036  		t.Fatalf("err: %v", err)
  1037  	}
  1038  
  1039  	// Insert an eval
  1040  	eval := mock.Eval()
  1041  	eval.JobID = job.ID
  1042  	eval.Status = structs.EvalStatusComplete
  1043  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
  1044  	if err != nil {
  1045  		t.Fatalf("err: %v", err)
  1046  	}
  1047  
  1048  	// Insert two allocs, one terminal and one not
  1049  	alloc := mock.Alloc()
  1050  	alloc.JobID = job.ID
  1051  	alloc.EvalID = eval.ID
  1052  	alloc.DesiredStatus = structs.AllocDesiredStatusRun
  1053  	alloc.ClientStatus = structs.AllocClientStatusComplete
  1054  	alloc.TaskGroup = job.TaskGroups[0].Name
  1055  
  1056  	alloc2 := mock.Alloc()
  1057  	alloc2.JobID = job.ID
  1058  	alloc2.EvalID = eval.ID
  1059  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
  1060  	alloc2.ClientStatus = structs.AllocClientStatusRunning
  1061  	alloc2.TaskGroup = job.TaskGroups[0].Name
  1062  
  1063  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
  1064  	if err != nil {
  1065  		t.Fatalf("err: %v", err)
  1066  	}
  1067  
  1068  	// Update the time tables to make this work
  1069  	tt := s1.fsm.TimeTable()
  1070  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
  1071  
  1072  	// Create a core scheduler
  1073  	snap, err := state.Snapshot()
  1074  	if err != nil {
  1075  		t.Fatalf("err: %v", err)
  1076  	}
  1077  	core := NewCoreScheduler(s1, snap)
  1078  
  1079  	// Attempt the GC
  1080  	gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
  1081  	err = core.Process(gc)
  1082  	if err != nil {
  1083  		t.Fatalf("err: %v", err)
  1084  	}
  1085  
  1086  	// Should still exist
  1087  	ws := memdb.NewWatchSet()
  1088  	out, err := state.JobByID(ws, job.Namespace, job.ID)
  1089  	if err != nil {
  1090  		t.Fatalf("err: %v", err)
  1091  	}
  1092  	if out == nil {
  1093  		t.Fatalf("bad: %v", out)
  1094  	}
  1095  
  1096  	outA, err := state.AllocByID(ws, alloc.ID)
  1097  	if err != nil {
  1098  		t.Fatalf("err: %v", err)
  1099  	}
  1100  	if outA == nil {
  1101  		t.Fatalf("bad: %v", outA)
  1102  	}
  1103  
  1104  	outA2, err := state.AllocByID(ws, alloc2.ID)
  1105  	if err != nil {
  1106  		t.Fatalf("err: %v", err)
  1107  	}
  1108  	if outA2 == nil {
  1109  		t.Fatalf("bad: %v", outA2)
  1110  	}
  1111  
  1112  	// Update the second alloc to be terminal
  1113  	alloc2.ClientStatus = structs.AllocClientStatusComplete
  1114  	err = state.UpsertAllocs(1003, []*structs.Allocation{alloc2})
  1115  	if err != nil {
  1116  		t.Fatalf("err: %v", err)
  1117  	}
  1118  
  1119  	// Create a core scheduler
  1120  	snap, err = state.Snapshot()
  1121  	if err != nil {
  1122  		t.Fatalf("err: %v", err)
  1123  	}
  1124  	core = NewCoreScheduler(s1, snap)
  1125  
  1126  	// Attempt the GC
  1127  	gc = s1.coreJobEval(structs.CoreJobJobGC, 2000)
  1128  	err = core.Process(gc)
  1129  	if err != nil {
  1130  		t.Fatalf("err: %v", err)
  1131  	}
  1132  
  1133  	// Should not still exist
  1134  	out, err = state.JobByID(ws, job.Namespace, job.ID)
  1135  	if err != nil {
  1136  		t.Fatalf("err: %v", err)
  1137  	}
  1138  	if out != nil {
  1139  		t.Fatalf("bad: %v", out)
  1140  	}
  1141  
  1142  	outA, err = state.AllocByID(ws, alloc.ID)
  1143  	if err != nil {
  1144  		t.Fatalf("err: %v", err)
  1145  	}
  1146  	if outA != nil {
  1147  		t.Fatalf("bad: %v", outA)
  1148  	}
  1149  
  1150  	outA2, err = state.AllocByID(ws, alloc2.ID)
  1151  	if err != nil {
  1152  		t.Fatalf("err: %v", err)
  1153  	}
  1154  	if outA2 != nil {
  1155  		t.Fatalf("bad: %v", outA2)
  1156  	}
  1157  }
  1158  
  1159  // This test ensures that batch jobs are GC'd in one shot, meaning it all
  1160  // allocs/evals and job or nothing
  1161  func TestCoreScheduler_JobGC_OneShot(t *testing.T) {
  1162  	t.Parallel()
  1163  	s1 := TestServer(t, nil)
  1164  	defer s1.Shutdown()
  1165  	testutil.WaitForLeader(t, s1.RPC)
  1166  
  1167  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1168  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1169  
  1170  	// Insert job.
  1171  	state := s1.fsm.State()
  1172  	job := mock.Job()
  1173  	job.Type = structs.JobTypeBatch
  1174  	err := state.UpsertJob(1000, job)
  1175  	if err != nil {
  1176  		t.Fatalf("err: %v", err)
  1177  	}
  1178  
  1179  	// Insert two complete evals
  1180  	eval := mock.Eval()
  1181  	eval.JobID = job.ID
  1182  	eval.Status = structs.EvalStatusComplete
  1183  
  1184  	eval2 := mock.Eval()
  1185  	eval2.JobID = job.ID
  1186  	eval2.Status = structs.EvalStatusComplete
  1187  
  1188  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
  1189  	if err != nil {
  1190  		t.Fatalf("err: %v", err)
  1191  	}
  1192  
  1193  	// Insert one complete alloc and one running on distinct evals
  1194  	alloc := mock.Alloc()
  1195  	alloc.JobID = job.ID
  1196  	alloc.EvalID = eval.ID
  1197  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
  1198  
  1199  	alloc2 := mock.Alloc()
  1200  	alloc2.JobID = job.ID
  1201  	alloc2.EvalID = eval2.ID
  1202  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
  1203  
  1204  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
  1205  	if err != nil {
  1206  		t.Fatalf("err: %v", err)
  1207  	}
  1208  
  1209  	// Force the jobs state to dead
  1210  	job.Status = structs.JobStatusDead
  1211  
  1212  	// Update the time tables to make this work
  1213  	tt := s1.fsm.TimeTable()
  1214  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
  1215  
  1216  	// Create a core scheduler
  1217  	snap, err := state.Snapshot()
  1218  	if err != nil {
  1219  		t.Fatalf("err: %v", err)
  1220  	}
  1221  	core := NewCoreScheduler(s1, snap)
  1222  
  1223  	// Attempt the GC
  1224  	gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
  1225  	err = core.Process(gc)
  1226  	if err != nil {
  1227  		t.Fatalf("err: %v", err)
  1228  	}
  1229  
  1230  	// Should still exist
  1231  	ws := memdb.NewWatchSet()
  1232  	out, err := state.JobByID(ws, job.Namespace, job.ID)
  1233  	if err != nil {
  1234  		t.Fatalf("err: %v", err)
  1235  	}
  1236  	if out == nil {
  1237  		t.Fatalf("bad: %v", out)
  1238  	}
  1239  
  1240  	outE, err := state.EvalByID(ws, eval.ID)
  1241  	if err != nil {
  1242  		t.Fatalf("err: %v", err)
  1243  	}
  1244  	if outE == nil {
  1245  		t.Fatalf("bad: %v", outE)
  1246  	}
  1247  
  1248  	outE2, err := state.EvalByID(ws, eval2.ID)
  1249  	if err != nil {
  1250  		t.Fatalf("err: %v", err)
  1251  	}
  1252  	if outE2 == nil {
  1253  		t.Fatalf("bad: %v", outE2)
  1254  	}
  1255  
  1256  	outA, err := state.AllocByID(ws, alloc.ID)
  1257  	if err != nil {
  1258  		t.Fatalf("err: %v", err)
  1259  	}
  1260  	if outA == nil {
  1261  		t.Fatalf("bad: %v", outA)
  1262  	}
  1263  	outA2, err := state.AllocByID(ws, alloc2.ID)
  1264  	if err != nil {
  1265  		t.Fatalf("err: %v", err)
  1266  	}
  1267  	if outA2 == nil {
  1268  		t.Fatalf("bad: %v", outA2)
  1269  	}
  1270  }
  1271  
  1272  // This test ensures that stopped jobs are GCd
  1273  func TestCoreScheduler_JobGC_Stopped(t *testing.T) {
  1274  	t.Parallel()
  1275  	s1 := TestServer(t, nil)
  1276  	defer s1.Shutdown()
  1277  	testutil.WaitForLeader(t, s1.RPC)
  1278  
  1279  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1280  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1281  
  1282  	// Insert job.
  1283  	state := s1.fsm.State()
  1284  	job := mock.Job()
  1285  	job.Stop = true
  1286  	job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{
  1287  		Attempts: 0,
  1288  		Interval: 0 * time.Second,
  1289  	}
  1290  	err := state.UpsertJob(1000, job)
  1291  	if err != nil {
  1292  		t.Fatalf("err: %v", err)
  1293  	}
  1294  
  1295  	// Insert two complete evals
  1296  	eval := mock.Eval()
  1297  	eval.JobID = job.ID
  1298  	eval.Status = structs.EvalStatusComplete
  1299  
  1300  	eval2 := mock.Eval()
  1301  	eval2.JobID = job.ID
  1302  	eval2.Status = structs.EvalStatusComplete
  1303  
  1304  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
  1305  	if err != nil {
  1306  		t.Fatalf("err: %v", err)
  1307  	}
  1308  
  1309  	// Insert one complete alloc
  1310  	alloc := mock.Alloc()
  1311  	alloc.JobID = job.ID
  1312  	alloc.EvalID = eval.ID
  1313  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
  1314  	alloc.TaskGroup = job.TaskGroups[0].Name
  1315  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc})
  1316  	if err != nil {
  1317  		t.Fatalf("err: %v", err)
  1318  	}
  1319  
  1320  	// Update the time tables to make this work
  1321  	tt := s1.fsm.TimeTable()
  1322  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
  1323  
  1324  	// Create a core scheduler
  1325  	snap, err := state.Snapshot()
  1326  	if err != nil {
  1327  		t.Fatalf("err: %v", err)
  1328  	}
  1329  	core := NewCoreScheduler(s1, snap)
  1330  
  1331  	// Attempt the GC
  1332  	gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
  1333  	err = core.Process(gc)
  1334  	if err != nil {
  1335  		t.Fatalf("err: %v", err)
  1336  	}
  1337  
  1338  	// Shouldn't still exist
  1339  	ws := memdb.NewWatchSet()
  1340  	out, err := state.JobByID(ws, job.Namespace, job.ID)
  1341  	if err != nil {
  1342  		t.Fatalf("err: %v", err)
  1343  	}
  1344  	if out != nil {
  1345  		t.Fatalf("bad: %v", out)
  1346  	}
  1347  
  1348  	outE, err := state.EvalByID(ws, eval.ID)
  1349  	if err != nil {
  1350  		t.Fatalf("err: %v", err)
  1351  	}
  1352  	if outE != nil {
  1353  		t.Fatalf("bad: %v", outE)
  1354  	}
  1355  
  1356  	outE2, err := state.EvalByID(ws, eval2.ID)
  1357  	if err != nil {
  1358  		t.Fatalf("err: %v", err)
  1359  	}
  1360  	if outE2 != nil {
  1361  		t.Fatalf("bad: %v", outE2)
  1362  	}
  1363  
  1364  	outA, err := state.AllocByID(ws, alloc.ID)
  1365  	if err != nil {
  1366  		t.Fatalf("err: %v", err)
  1367  	}
  1368  	if outA != nil {
  1369  		t.Fatalf("bad: %v", outA)
  1370  	}
  1371  }
  1372  
  1373  func TestCoreScheduler_JobGC_Force(t *testing.T) {
  1374  	t.Parallel()
  1375  	for _, withAcl := range []bool{false, true} {
  1376  		t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
  1377  			var server *Server
  1378  			if withAcl {
  1379  				server, _ = TestACLServer(t, nil)
  1380  			} else {
  1381  				server = TestServer(t, nil)
  1382  			}
  1383  			defer server.Shutdown()
  1384  			testutil.WaitForLeader(t, server.RPC)
  1385  
  1386  			// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1387  			server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1388  
  1389  			// Insert job.
  1390  			state := server.fsm.State()
  1391  			job := mock.Job()
  1392  			job.Type = structs.JobTypeBatch
  1393  			job.Status = structs.JobStatusDead
  1394  			err := state.UpsertJob(1000, job)
  1395  			if err != nil {
  1396  				t.Fatalf("err: %v", err)
  1397  			}
  1398  
  1399  			// Insert a terminal eval
  1400  			eval := mock.Eval()
  1401  			eval.JobID = job.ID
  1402  			eval.Status = structs.EvalStatusComplete
  1403  			err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
  1404  			if err != nil {
  1405  				t.Fatalf("err: %v", err)
  1406  			}
  1407  
  1408  			// Create a core scheduler
  1409  			snap, err := state.Snapshot()
  1410  			if err != nil {
  1411  				t.Fatalf("err: %v", err)
  1412  			}
  1413  			core := NewCoreScheduler(server, snap)
  1414  
  1415  			// Attempt the GC
  1416  			gc := server.coreJobEval(structs.CoreJobForceGC, 1002)
  1417  			err = core.Process(gc)
  1418  			if err != nil {
  1419  				t.Fatalf("err: %v", err)
  1420  			}
  1421  
  1422  			// Shouldn't still exist
  1423  			ws := memdb.NewWatchSet()
  1424  			out, err := state.JobByID(ws, job.Namespace, job.ID)
  1425  			if err != nil {
  1426  				t.Fatalf("err: %v", err)
  1427  			}
  1428  			if out != nil {
  1429  				t.Fatalf("bad: %v", out)
  1430  			}
  1431  
  1432  			outE, err := state.EvalByID(ws, eval.ID)
  1433  			if err != nil {
  1434  				t.Fatalf("err: %v", err)
  1435  			}
  1436  			if outE != nil {
  1437  				t.Fatalf("bad: %v", outE)
  1438  			}
  1439  		})
  1440  	}
  1441  }
  1442  
  1443  // This test ensures parameterized jobs only get gc'd when stopped
  1444  func TestCoreScheduler_JobGC_Parameterized(t *testing.T) {
  1445  	t.Parallel()
  1446  	s1 := TestServer(t, nil)
  1447  	defer s1.Shutdown()
  1448  	testutil.WaitForLeader(t, s1.RPC)
  1449  
  1450  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1451  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1452  
  1453  	// Insert a parameterized job.
  1454  	state := s1.fsm.State()
  1455  	job := mock.Job()
  1456  	job.Type = structs.JobTypeBatch
  1457  	job.Status = structs.JobStatusRunning
  1458  	job.ParameterizedJob = &structs.ParameterizedJobConfig{
  1459  		Payload: structs.DispatchPayloadRequired,
  1460  	}
  1461  	err := state.UpsertJob(1000, job)
  1462  	if err != nil {
  1463  		t.Fatalf("err: %v", err)
  1464  	}
  1465  
  1466  	// Create a core scheduler
  1467  	snap, err := state.Snapshot()
  1468  	if err != nil {
  1469  		t.Fatalf("err: %v", err)
  1470  	}
  1471  	core := NewCoreScheduler(s1, snap)
  1472  
  1473  	// Attempt the GC
  1474  	gc := s1.coreJobEval(structs.CoreJobForceGC, 1002)
  1475  	err = core.Process(gc)
  1476  	if err != nil {
  1477  		t.Fatalf("err: %v", err)
  1478  	}
  1479  
  1480  	// Should still exist
  1481  	ws := memdb.NewWatchSet()
  1482  	out, err := state.JobByID(ws, job.Namespace, job.ID)
  1483  	if err != nil {
  1484  		t.Fatalf("err: %v", err)
  1485  	}
  1486  	if out == nil {
  1487  		t.Fatalf("bad: %v", out)
  1488  	}
  1489  
  1490  	// Mark the job as stopped and try again
  1491  	job2 := job.Copy()
  1492  	job2.Stop = true
  1493  	err = state.UpsertJob(2000, job2)
  1494  	if err != nil {
  1495  		t.Fatalf("err: %v", err)
  1496  	}
  1497  
  1498  	// Create a core scheduler
  1499  	snap, err = state.Snapshot()
  1500  	if err != nil {
  1501  		t.Fatalf("err: %v", err)
  1502  	}
  1503  	core = NewCoreScheduler(s1, snap)
  1504  
  1505  	// Attempt the GC
  1506  	gc = s1.coreJobEval(structs.CoreJobForceGC, 2002)
  1507  	err = core.Process(gc)
  1508  	if err != nil {
  1509  		t.Fatalf("err: %v", err)
  1510  	}
  1511  
  1512  	// Should not exist
  1513  	out, err = state.JobByID(ws, job.Namespace, job.ID)
  1514  	if err != nil {
  1515  		t.Fatalf("err: %v", err)
  1516  	}
  1517  	if out != nil {
  1518  		t.Fatalf("bad: %+v", out)
  1519  	}
  1520  }
  1521  
  1522  // This test ensures periodic jobs don't get GCd til they are stopped
  1523  func TestCoreScheduler_JobGC_Periodic(t *testing.T) {
  1524  	t.Parallel()
  1525  
  1526  	s1 := TestServer(t, nil)
  1527  	defer s1.Shutdown()
  1528  	testutil.WaitForLeader(t, s1.RPC)
  1529  
  1530  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1531  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1532  
  1533  	// Insert a parameterized job.
  1534  	state := s1.fsm.State()
  1535  	job := mock.PeriodicJob()
  1536  	err := state.UpsertJob(1000, job)
  1537  	if err != nil {
  1538  		t.Fatalf("err: %v", err)
  1539  	}
  1540  
  1541  	// Create a core scheduler
  1542  	snap, err := state.Snapshot()
  1543  	if err != nil {
  1544  		t.Fatalf("err: %v", err)
  1545  	}
  1546  	core := NewCoreScheduler(s1, snap)
  1547  
  1548  	// Attempt the GC
  1549  	gc := s1.coreJobEval(structs.CoreJobForceGC, 1002)
  1550  	err = core.Process(gc)
  1551  	if err != nil {
  1552  		t.Fatalf("err: %v", err)
  1553  	}
  1554  
  1555  	// Should still exist
  1556  	ws := memdb.NewWatchSet()
  1557  	out, err := state.JobByID(ws, job.Namespace, job.ID)
  1558  	if err != nil {
  1559  		t.Fatalf("err: %v", err)
  1560  	}
  1561  	if out == nil {
  1562  		t.Fatalf("bad: %v", out)
  1563  	}
  1564  
  1565  	// Mark the job as stopped and try again
  1566  	job2 := job.Copy()
  1567  	job2.Stop = true
  1568  	err = state.UpsertJob(2000, job2)
  1569  	if err != nil {
  1570  		t.Fatalf("err: %v", err)
  1571  	}
  1572  
  1573  	// Create a core scheduler
  1574  	snap, err = state.Snapshot()
  1575  	if err != nil {
  1576  		t.Fatalf("err: %v", err)
  1577  	}
  1578  	core = NewCoreScheduler(s1, snap)
  1579  
  1580  	// Attempt the GC
  1581  	gc = s1.coreJobEval(structs.CoreJobForceGC, 2002)
  1582  	err = core.Process(gc)
  1583  	if err != nil {
  1584  		t.Fatalf("err: %v", err)
  1585  	}
  1586  
  1587  	// Should not exist
  1588  	out, err = state.JobByID(ws, job.Namespace, job.ID)
  1589  	if err != nil {
  1590  		t.Fatalf("err: %v", err)
  1591  	}
  1592  	if out != nil {
  1593  		t.Fatalf("bad: %+v", out)
  1594  	}
  1595  }
  1596  
  1597  func TestCoreScheduler_DeploymentGC(t *testing.T) {
  1598  	t.Parallel()
  1599  	s1 := TestServer(t, nil)
  1600  	defer s1.Shutdown()
  1601  	testutil.WaitForLeader(t, s1.RPC)
  1602  	assert := assert.New(t)
  1603  
  1604  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1605  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1606  
  1607  	// Insert an active, terminal, and terminal with allocations edeployment
  1608  	state := s1.fsm.State()
  1609  	d1, d2, d3 := mock.Deployment(), mock.Deployment(), mock.Deployment()
  1610  	d1.Status = structs.DeploymentStatusFailed
  1611  	d3.Status = structs.DeploymentStatusSuccessful
  1612  	assert.Nil(state.UpsertDeployment(1000, d1), "UpsertDeployment")
  1613  	assert.Nil(state.UpsertDeployment(1001, d2), "UpsertDeployment")
  1614  	assert.Nil(state.UpsertDeployment(1002, d3), "UpsertDeployment")
  1615  
  1616  	a := mock.Alloc()
  1617  	a.JobID = d3.JobID
  1618  	a.DeploymentID = d3.ID
  1619  	assert.Nil(state.UpsertAllocs(1003, []*structs.Allocation{a}), "UpsertAllocs")
  1620  
  1621  	// Update the time tables to make this work
  1622  	tt := s1.fsm.TimeTable()
  1623  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.DeploymentGCThreshold))
  1624  
  1625  	// Create a core scheduler
  1626  	snap, err := state.Snapshot()
  1627  	assert.Nil(err, "Snapshot")
  1628  	core := NewCoreScheduler(s1, snap)
  1629  
  1630  	// Attempt the GC
  1631  	gc := s1.coreJobEval(structs.CoreJobDeploymentGC, 2000)
  1632  	assert.Nil(core.Process(gc), "Process GC")
  1633  
  1634  	// Should be gone
  1635  	ws := memdb.NewWatchSet()
  1636  	out, err := state.DeploymentByID(ws, d1.ID)
  1637  	assert.Nil(err, "DeploymentByID")
  1638  	assert.Nil(out, "Terminal Deployment")
  1639  	out2, err := state.DeploymentByID(ws, d2.ID)
  1640  	assert.Nil(err, "DeploymentByID")
  1641  	assert.NotNil(out2, "Active Deployment")
  1642  	out3, err := state.DeploymentByID(ws, d3.ID)
  1643  	assert.Nil(err, "DeploymentByID")
  1644  	assert.NotNil(out3, "Terminal Deployment With Allocs")
  1645  }
  1646  
  1647  func TestCoreScheduler_DeploymentGC_Force(t *testing.T) {
  1648  	t.Parallel()
  1649  	for _, withAcl := range []bool{false, true} {
  1650  		t.Run(fmt.Sprintf("with acl %v", withAcl), func(t *testing.T) {
  1651  			var server *Server
  1652  			if withAcl {
  1653  				server, _ = TestACLServer(t, nil)
  1654  			} else {
  1655  				server = TestServer(t, nil)
  1656  			}
  1657  			defer server.Shutdown()
  1658  			testutil.WaitForLeader(t, server.RPC)
  1659  			assert := assert.New(t)
  1660  
  1661  			// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1662  			server.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1663  
  1664  			// Insert terminal and active deployment
  1665  			state := server.fsm.State()
  1666  			d1, d2 := mock.Deployment(), mock.Deployment()
  1667  			d1.Status = structs.DeploymentStatusFailed
  1668  			assert.Nil(state.UpsertDeployment(1000, d1), "UpsertDeployment")
  1669  			assert.Nil(state.UpsertDeployment(1001, d2), "UpsertDeployment")
  1670  
  1671  			// Create a core scheduler
  1672  			snap, err := state.Snapshot()
  1673  			assert.Nil(err, "Snapshot")
  1674  			core := NewCoreScheduler(server, snap)
  1675  
  1676  			// Attempt the GC
  1677  			gc := server.coreJobEval(structs.CoreJobForceGC, 1000)
  1678  			assert.Nil(core.Process(gc), "Process Force GC")
  1679  
  1680  			// Should be gone
  1681  			ws := memdb.NewWatchSet()
  1682  			out, err := state.DeploymentByID(ws, d1.ID)
  1683  			assert.Nil(err, "DeploymentByID")
  1684  			assert.Nil(out, "Terminal Deployment")
  1685  			out2, err := state.DeploymentByID(ws, d2.ID)
  1686  			assert.Nil(err, "DeploymentByID")
  1687  			assert.NotNil(out2, "Active Deployment")
  1688  		})
  1689  	}
  1690  }
  1691  
  1692  func TestCoreScheduler_PartitionEvalReap(t *testing.T) {
  1693  	t.Parallel()
  1694  	s1 := TestServer(t, nil)
  1695  	defer s1.Shutdown()
  1696  	testutil.WaitForLeader(t, s1.RPC)
  1697  
  1698  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1699  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1700  
  1701  	// Create a core scheduler
  1702  	snap, err := s1.fsm.State().Snapshot()
  1703  	if err != nil {
  1704  		t.Fatalf("err: %v", err)
  1705  	}
  1706  	core := NewCoreScheduler(s1, snap)
  1707  
  1708  	// Set the max ids per reap to something lower.
  1709  	maxIdsPerReap = 2
  1710  
  1711  	evals := []string{"a", "b", "c"}
  1712  	allocs := []string{"1", "2", "3"}
  1713  	requests := core.(*CoreScheduler).partitionEvalReap(evals, allocs)
  1714  	if len(requests) != 3 {
  1715  		t.Fatalf("Expected 3 requests got: %v", requests)
  1716  	}
  1717  
  1718  	first := requests[0]
  1719  	if len(first.Allocs) != 2 && len(first.Evals) != 0 {
  1720  		t.Fatalf("Unexpected first request: %v", first)
  1721  	}
  1722  
  1723  	second := requests[1]
  1724  	if len(second.Allocs) != 1 && len(second.Evals) != 1 {
  1725  		t.Fatalf("Unexpected second request: %v", second)
  1726  	}
  1727  
  1728  	third := requests[2]
  1729  	if len(third.Allocs) != 0 && len(third.Evals) != 2 {
  1730  		t.Fatalf("Unexpected third request: %v", third)
  1731  	}
  1732  }
  1733  
  1734  func TestCoreScheduler_PartitionDeploymentReap(t *testing.T) {
  1735  	t.Parallel()
  1736  	s1 := TestServer(t, nil)
  1737  	defer s1.Shutdown()
  1738  	testutil.WaitForLeader(t, s1.RPC)
  1739  
  1740  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
  1741  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
  1742  
  1743  	// Create a core scheduler
  1744  	snap, err := s1.fsm.State().Snapshot()
  1745  	if err != nil {
  1746  		t.Fatalf("err: %v", err)
  1747  	}
  1748  	core := NewCoreScheduler(s1, snap)
  1749  
  1750  	// Set the max ids per reap to something lower.
  1751  	maxIdsPerReap = 2
  1752  
  1753  	deployments := []string{"a", "b", "c"}
  1754  	requests := core.(*CoreScheduler).partitionDeploymentReap(deployments)
  1755  	if len(requests) != 2 {
  1756  		t.Fatalf("Expected 2 requests got: %v", requests)
  1757  	}
  1758  
  1759  	first := requests[0]
  1760  	if len(first.Deployments) != 2 {
  1761  		t.Fatalf("Unexpected first request: %v", first)
  1762  	}
  1763  
  1764  	second := requests[1]
  1765  	if len(second.Deployments) != 1 {
  1766  		t.Fatalf("Unexpected second request: %v", second)
  1767  	}
  1768  }
  1769  
  1770  // Tests various scenarios when allocations are eligible to be GCed
  1771  func TestAllocation_GCEligible(t *testing.T) {
  1772  	type testCase struct {
  1773  		Desc               string
  1774  		GCTime             time.Time
  1775  		ClientStatus       string
  1776  		DesiredStatus      string
  1777  		JobStatus          string
  1778  		JobStop            bool
  1779  		ModifyIndex        uint64
  1780  		NextAllocID        string
  1781  		ReschedulePolicy   *structs.ReschedulePolicy
  1782  		RescheduleTrackers []*structs.RescheduleEvent
  1783  		ThresholdIndex     uint64
  1784  		ShouldGC           bool
  1785  	}
  1786  
  1787  	fail := time.Now()
  1788  
  1789  	harness := []testCase{
  1790  		{
  1791  			Desc:           "GC when non terminal",
  1792  			ClientStatus:   structs.AllocClientStatusPending,
  1793  			DesiredStatus:  structs.AllocDesiredStatusRun,
  1794  			GCTime:         fail,
  1795  			ModifyIndex:    90,
  1796  			ThresholdIndex: 90,
  1797  			ShouldGC:       false,
  1798  		},
  1799  		{
  1800  			Desc:           "GC when non terminal and job stopped",
  1801  			ClientStatus:   structs.AllocClientStatusPending,
  1802  			DesiredStatus:  structs.AllocDesiredStatusRun,
  1803  			JobStop:        true,
  1804  			GCTime:         fail,
  1805  			ModifyIndex:    90,
  1806  			ThresholdIndex: 90,
  1807  			ShouldGC:       false,
  1808  		},
  1809  		{
  1810  			Desc:           "GC when non terminal and job dead",
  1811  			ClientStatus:   structs.AllocClientStatusPending,
  1812  			DesiredStatus:  structs.AllocDesiredStatusRun,
  1813  			JobStatus:      structs.JobStatusDead,
  1814  			GCTime:         fail,
  1815  			ModifyIndex:    90,
  1816  			ThresholdIndex: 90,
  1817  			ShouldGC:       false,
  1818  		},
  1819  		{
  1820  			Desc:             "GC when threshold not met",
  1821  			ClientStatus:     structs.AllocClientStatusComplete,
  1822  			DesiredStatus:    structs.AllocDesiredStatusStop,
  1823  			GCTime:           fail,
  1824  			ModifyIndex:      100,
  1825  			ThresholdIndex:   90,
  1826  			ReschedulePolicy: nil,
  1827  			ShouldGC:         false,
  1828  		},
  1829  		{
  1830  			Desc:             "GC when no reschedule policy",
  1831  			ClientStatus:     structs.AllocClientStatusFailed,
  1832  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1833  			GCTime:           fail,
  1834  			ReschedulePolicy: nil,
  1835  			ModifyIndex:      90,
  1836  			ThresholdIndex:   90,
  1837  			ShouldGC:         true,
  1838  		},
  1839  		{
  1840  			Desc:             "GC when empty policy",
  1841  			ClientStatus:     structs.AllocClientStatusFailed,
  1842  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1843  			GCTime:           fail,
  1844  			ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 0, Interval: 0 * time.Minute},
  1845  			ModifyIndex:      90,
  1846  			ThresholdIndex:   90,
  1847  			ShouldGC:         true,
  1848  		},
  1849  		{
  1850  			Desc:             "GC with no previous attempts",
  1851  			ClientStatus:     structs.AllocClientStatusFailed,
  1852  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1853  			GCTime:           fail,
  1854  			ModifyIndex:      90,
  1855  			ThresholdIndex:   90,
  1856  			ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 1, Interval: 1 * time.Minute},
  1857  			ShouldGC:         false,
  1858  		},
  1859  		{
  1860  			Desc:             "GC with prev reschedule attempt within interval",
  1861  			ClientStatus:     structs.AllocClientStatusFailed,
  1862  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1863  			ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 2, Interval: 30 * time.Minute},
  1864  			GCTime:           fail,
  1865  			ModifyIndex:      90,
  1866  			ThresholdIndex:   90,
  1867  			RescheduleTrackers: []*structs.RescheduleEvent{
  1868  				{
  1869  					RescheduleTime: fail.Add(-5 * time.Minute).UTC().UnixNano(),
  1870  				},
  1871  			},
  1872  			ShouldGC: false,
  1873  		},
  1874  		{
  1875  			Desc:             "GC with prev reschedule attempt outside interval",
  1876  			ClientStatus:     structs.AllocClientStatusFailed,
  1877  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1878  			GCTime:           fail,
  1879  			ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
  1880  			RescheduleTrackers: []*structs.RescheduleEvent{
  1881  				{
  1882  					RescheduleTime: fail.Add(-45 * time.Minute).UTC().UnixNano(),
  1883  				},
  1884  				{
  1885  					RescheduleTime: fail.Add(-60 * time.Minute).UTC().UnixNano(),
  1886  				},
  1887  			},
  1888  			ShouldGC: true,
  1889  		},
  1890  		{
  1891  			Desc:             "GC when next alloc id is set",
  1892  			ClientStatus:     structs.AllocClientStatusFailed,
  1893  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1894  			GCTime:           fail,
  1895  			ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
  1896  			RescheduleTrackers: []*structs.RescheduleEvent{
  1897  				{
  1898  					RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
  1899  				},
  1900  			},
  1901  			NextAllocID: uuid.Generate(),
  1902  			ShouldGC:    true,
  1903  		},
  1904  		{
  1905  			Desc:             "GC when job is stopped",
  1906  			ClientStatus:     structs.AllocClientStatusFailed,
  1907  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1908  			GCTime:           fail,
  1909  			ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
  1910  			RescheduleTrackers: []*structs.RescheduleEvent{
  1911  				{
  1912  					RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
  1913  				},
  1914  			},
  1915  			JobStop:  true,
  1916  			ShouldGC: true,
  1917  		},
  1918  		{
  1919  			Desc:             "GC when job status is dead",
  1920  			ClientStatus:     structs.AllocClientStatusFailed,
  1921  			DesiredStatus:    structs.AllocDesiredStatusRun,
  1922  			GCTime:           fail,
  1923  			ReschedulePolicy: &structs.ReschedulePolicy{Attempts: 5, Interval: 30 * time.Minute},
  1924  			RescheduleTrackers: []*structs.RescheduleEvent{
  1925  				{
  1926  					RescheduleTime: fail.Add(-3 * time.Minute).UTC().UnixNano(),
  1927  				},
  1928  			},
  1929  			JobStatus: structs.JobStatusDead,
  1930  			ShouldGC:  true,
  1931  		},
  1932  	}
  1933  
  1934  	for _, tc := range harness {
  1935  		alloc := &structs.Allocation{}
  1936  		alloc.ModifyIndex = tc.ModifyIndex
  1937  		alloc.DesiredStatus = tc.DesiredStatus
  1938  		alloc.ClientStatus = tc.ClientStatus
  1939  		alloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.RescheduleTrackers}
  1940  		alloc.NextAllocation = tc.NextAllocID
  1941  		job := mock.Job()
  1942  		alloc.TaskGroup = job.TaskGroups[0].Name
  1943  		job.TaskGroups[0].ReschedulePolicy = tc.ReschedulePolicy
  1944  		if tc.JobStatus != "" {
  1945  			job.Status = tc.JobStatus
  1946  		}
  1947  		job.Stop = tc.JobStop
  1948  
  1949  		t.Run(tc.Desc, func(t *testing.T) {
  1950  			if got := allocGCEligible(alloc, job, tc.GCTime, tc.ThresholdIndex); got != tc.ShouldGC {
  1951  				t.Fatalf("expected %v but got %v", tc.ShouldGC, got)
  1952  			}
  1953  		})
  1954  
  1955  	}
  1956  
  1957  	// Verify nil job
  1958  	require := require.New(t)
  1959  	alloc := mock.Alloc()
  1960  	alloc.ClientStatus = structs.AllocClientStatusComplete
  1961  	require.True(allocGCEligible(alloc, nil, time.Now(), 1000))
  1962  }