github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/core_sched_test.go (about)

     1  package nomad
     2  
     3  import (
     4  	"testing"
     5  	"time"
     6  
     7  	"github.com/hashicorp/nomad/nomad/mock"
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  	"github.com/hashicorp/nomad/testutil"
    10  )
    11  
    12  func TestCoreScheduler_EvalGC(t *testing.T) {
    13  	s1 := testServer(t, nil)
    14  	defer s1.Shutdown()
    15  	testutil.WaitForLeader(t, s1.RPC)
    16  
    17  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
    18  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
    19  
    20  	// Insert "dead" eval
    21  	state := s1.fsm.State()
    22  	eval := mock.Eval()
    23  	eval.Status = structs.EvalStatusFailed
    24  	state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
    25  	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
    26  	if err != nil {
    27  		t.Fatalf("err: %v", err)
    28  	}
    29  
    30  	// Insert "dead" alloc
    31  	alloc := mock.Alloc()
    32  	alloc.EvalID = eval.ID
    33  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
    34  	alloc.JobID = eval.JobID
    35  
    36  	// Insert "lost" alloc
    37  	alloc2 := mock.Alloc()
    38  	alloc2.EvalID = eval.ID
    39  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
    40  	alloc2.ClientStatus = structs.AllocClientStatusLost
    41  	alloc2.JobID = eval.JobID
    42  	err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2})
    43  	if err != nil {
    44  		t.Fatalf("err: %v", err)
    45  	}
    46  
    47  	// Update the time tables to make this work
    48  	tt := s1.fsm.TimeTable()
    49  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
    50  
    51  	// Create a core scheduler
    52  	snap, err := state.Snapshot()
    53  	if err != nil {
    54  		t.Fatalf("err: %v", err)
    55  	}
    56  	core := NewCoreScheduler(s1, snap)
    57  
    58  	// Attempt the GC
    59  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
    60  	err = core.Process(gc)
    61  	if err != nil {
    62  		t.Fatalf("err: %v", err)
    63  	}
    64  
    65  	// Should be gone
    66  	out, err := state.EvalByID(eval.ID)
    67  	if err != nil {
    68  		t.Fatalf("err: %v", err)
    69  	}
    70  	if out != nil {
    71  		t.Fatalf("bad: %v", out)
    72  	}
    73  
    74  	outA, err := state.AllocByID(alloc.ID)
    75  	if err != nil {
    76  		t.Fatalf("err: %v", err)
    77  	}
    78  	if outA != nil {
    79  		t.Fatalf("bad: %v", outA)
    80  	}
    81  
    82  	outA2, err := state.AllocByID(alloc2.ID)
    83  	if err != nil {
    84  		t.Fatalf("err: %v", err)
    85  	}
    86  	if outA2 != nil {
    87  		t.Fatalf("bad: %v", outA2)
    88  	}
    89  }
    90  
    91  // An EvalGC should never reap a batch job
    92  func TestCoreScheduler_EvalGC_Batch(t *testing.T) {
    93  	s1 := testServer(t, nil)
    94  	defer s1.Shutdown()
    95  	testutil.WaitForLeader(t, s1.RPC)
    96  
    97  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
    98  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
    99  
   100  	// Insert a "dead" job
   101  	state := s1.fsm.State()
   102  	job := mock.Job()
   103  	job.Type = structs.JobTypeBatch
   104  	job.Status = structs.JobStatusDead
   105  	err := state.UpsertJob(1000, job)
   106  	if err != nil {
   107  		t.Fatalf("err: %v", err)
   108  	}
   109  
   110  	// Insert "complete" eval
   111  	eval := mock.Eval()
   112  	eval.Status = structs.EvalStatusComplete
   113  	eval.Type = structs.JobTypeBatch
   114  	eval.JobID = job.ID
   115  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
   116  	if err != nil {
   117  		t.Fatalf("err: %v", err)
   118  	}
   119  
   120  	// Insert "failed" alloc
   121  	alloc := mock.Alloc()
   122  	alloc.JobID = job.ID
   123  	alloc.EvalID = eval.ID
   124  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   125  
   126  	// Insert "lost" alloc
   127  	alloc2 := mock.Alloc()
   128  	alloc2.JobID = job.ID
   129  	alloc2.EvalID = eval.ID
   130  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   131  	alloc2.ClientStatus = structs.AllocClientStatusLost
   132  
   133  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
   134  	if err != nil {
   135  		t.Fatalf("err: %v", err)
   136  	}
   137  
   138  	// Update the time tables to make this work
   139  	tt := s1.fsm.TimeTable()
   140  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
   141  
   142  	// Create a core scheduler
   143  	snap, err := state.Snapshot()
   144  	if err != nil {
   145  		t.Fatalf("err: %v", err)
   146  	}
   147  	core := NewCoreScheduler(s1, snap)
   148  
   149  	// Attempt the GC
   150  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
   151  	err = core.Process(gc)
   152  	if err != nil {
   153  		t.Fatalf("err: %v", err)
   154  	}
   155  
   156  	// Nothing should be gone
   157  	out, err := state.EvalByID(eval.ID)
   158  	if err != nil {
   159  		t.Fatalf("err: %v", err)
   160  	}
   161  	if out == nil {
   162  		t.Fatalf("bad: %v", out)
   163  	}
   164  
   165  	outA, err := state.AllocByID(alloc.ID)
   166  	if err != nil {
   167  		t.Fatalf("err: %v", err)
   168  	}
   169  	if outA == nil {
   170  		t.Fatalf("bad: %v", outA)
   171  	}
   172  
   173  	outA2, err := state.AllocByID(alloc2.ID)
   174  	if err != nil {
   175  		t.Fatalf("err: %v", err)
   176  	}
   177  	if outA2 == nil {
   178  		t.Fatalf("bad: %v", outA2)
   179  	}
   180  
   181  	outB, err := state.JobByID(job.ID)
   182  	if err != nil {
   183  		t.Fatalf("err: %v", err)
   184  	}
   185  	if outB == nil {
   186  		t.Fatalf("bad: %v", outB)
   187  	}
   188  }
   189  
   190  func TestCoreScheduler_EvalGC_Partial(t *testing.T) {
   191  	s1 := testServer(t, nil)
   192  	defer s1.Shutdown()
   193  	testutil.WaitForLeader(t, s1.RPC)
   194  
   195  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   196  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   197  
   198  	// Insert "dead" eval
   199  	state := s1.fsm.State()
   200  	eval := mock.Eval()
   201  	eval.Status = structs.EvalStatusComplete
   202  	state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
   203  	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
   204  	if err != nil {
   205  		t.Fatalf("err: %v", err)
   206  	}
   207  
   208  	// Insert "dead" alloc
   209  	alloc := mock.Alloc()
   210  	alloc.EvalID = eval.ID
   211  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   212  	state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   213  
   214  	// Insert "lost" alloc
   215  	alloc2 := mock.Alloc()
   216  	alloc2.JobID = alloc.JobID
   217  	alloc2.EvalID = eval.ID
   218  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   219  	alloc2.ClientStatus = structs.AllocClientStatusLost
   220  
   221  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
   222  	if err != nil {
   223  		t.Fatalf("err: %v", err)
   224  	}
   225  
   226  	// Insert "running" alloc
   227  	alloc3 := mock.Alloc()
   228  	alloc3.EvalID = eval.ID
   229  	state.UpsertJobSummary(1003, mock.JobSummary(alloc3.JobID))
   230  	err = state.UpsertAllocs(1004, []*structs.Allocation{alloc3})
   231  	if err != nil {
   232  		t.Fatalf("err: %v", err)
   233  	}
   234  
   235  	// Update the time tables to make this work
   236  	tt := s1.fsm.TimeTable()
   237  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold))
   238  
   239  	// Create a core scheduler
   240  	snap, err := state.Snapshot()
   241  	if err != nil {
   242  		t.Fatalf("err: %v", err)
   243  	}
   244  	core := NewCoreScheduler(s1, snap)
   245  
   246  	// Attempt the GC
   247  	gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000)
   248  	err = core.Process(gc)
   249  	if err != nil {
   250  		t.Fatalf("err: %v", err)
   251  	}
   252  
   253  	// Should not be gone
   254  	out, err := state.EvalByID(eval.ID)
   255  	if err != nil {
   256  		t.Fatalf("err: %v", err)
   257  	}
   258  	if out == nil {
   259  		t.Fatalf("bad: %v", out)
   260  	}
   261  
   262  	outA, err := state.AllocByID(alloc3.ID)
   263  	if err != nil {
   264  		t.Fatalf("err: %v", err)
   265  	}
   266  	if outA == nil {
   267  		t.Fatalf("bad: %v", outA)
   268  	}
   269  
   270  	// Should be gone
   271  	outB, err := state.AllocByID(alloc.ID)
   272  	if err != nil {
   273  		t.Fatalf("err: %v", err)
   274  	}
   275  	if outB != nil {
   276  		t.Fatalf("bad: %v", outB)
   277  	}
   278  
   279  	outC, err := state.AllocByID(alloc2.ID)
   280  	if err != nil {
   281  		t.Fatalf("err: %v", err)
   282  	}
   283  	if outC != nil {
   284  		t.Fatalf("bad: %v", outC)
   285  	}
   286  }
   287  
   288  func TestCoreScheduler_EvalGC_Force(t *testing.T) {
   289  	s1 := testServer(t, nil)
   290  	defer s1.Shutdown()
   291  	testutil.WaitForLeader(t, s1.RPC)
   292  
   293  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   294  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   295  
   296  	// Insert "dead" eval
   297  	state := s1.fsm.State()
   298  	eval := mock.Eval()
   299  	eval.Status = structs.EvalStatusFailed
   300  	state.UpsertJobSummary(999, mock.JobSummary(eval.JobID))
   301  	err := state.UpsertEvals(1000, []*structs.Evaluation{eval})
   302  	if err != nil {
   303  		t.Fatalf("err: %v", err)
   304  	}
   305  
   306  	// Insert "dead" alloc
   307  	alloc := mock.Alloc()
   308  	alloc.EvalID = eval.ID
   309  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   310  	state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   311  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc})
   312  	if err != nil {
   313  		t.Fatalf("err: %v", err)
   314  	}
   315  
   316  	// Create a core scheduler
   317  	snap, err := state.Snapshot()
   318  	if err != nil {
   319  		t.Fatalf("err: %v", err)
   320  	}
   321  	core := NewCoreScheduler(s1, snap)
   322  
   323  	// Attempt the GC
   324  	gc := s1.coreJobEval(structs.CoreJobForceGC, 1002)
   325  	err = core.Process(gc)
   326  	if err != nil {
   327  		t.Fatalf("err: %v", err)
   328  	}
   329  
   330  	// Should be gone
   331  	out, err := state.EvalByID(eval.ID)
   332  	if err != nil {
   333  		t.Fatalf("err: %v", err)
   334  	}
   335  	if out != nil {
   336  		t.Fatalf("bad: %v", out)
   337  	}
   338  
   339  	outA, err := state.AllocByID(alloc.ID)
   340  	if err != nil {
   341  		t.Fatalf("err: %v", err)
   342  	}
   343  	if outA != nil {
   344  		t.Fatalf("bad: %v", outA)
   345  	}
   346  }
   347  
   348  func TestCoreScheduler_NodeGC(t *testing.T) {
   349  	s1 := testServer(t, nil)
   350  	defer s1.Shutdown()
   351  	testutil.WaitForLeader(t, s1.RPC)
   352  
   353  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   354  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   355  
   356  	// Insert "dead" node
   357  	state := s1.fsm.State()
   358  	node := mock.Node()
   359  	node.Status = structs.NodeStatusDown
   360  	err := state.UpsertNode(1000, node)
   361  	if err != nil {
   362  		t.Fatalf("err: %v", err)
   363  	}
   364  
   365  	// Update the time tables to make this work
   366  	tt := s1.fsm.TimeTable()
   367  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold))
   368  
   369  	// Create a core scheduler
   370  	snap, err := state.Snapshot()
   371  	if err != nil {
   372  		t.Fatalf("err: %v", err)
   373  	}
   374  	core := NewCoreScheduler(s1, snap)
   375  
   376  	// Attempt the GC
   377  	gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000)
   378  	err = core.Process(gc)
   379  	if err != nil {
   380  		t.Fatalf("err: %v", err)
   381  	}
   382  
   383  	// Should be gone
   384  	out, err := state.NodeByID(node.ID)
   385  	if err != nil {
   386  		t.Fatalf("err: %v", err)
   387  	}
   388  	if out != nil {
   389  		t.Fatalf("bad: %v", out)
   390  	}
   391  }
   392  
   393  func TestCoreScheduler_NodeGC_TerminalAllocs(t *testing.T) {
   394  	s1 := testServer(t, nil)
   395  	defer s1.Shutdown()
   396  	testutil.WaitForLeader(t, s1.RPC)
   397  
   398  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   399  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   400  
   401  	// Insert "dead" node
   402  	state := s1.fsm.State()
   403  	node := mock.Node()
   404  	node.Status = structs.NodeStatusDown
   405  	err := state.UpsertNode(1000, node)
   406  	if err != nil {
   407  		t.Fatalf("err: %v", err)
   408  	}
   409  
   410  	// Insert a terminal alloc on that node
   411  	alloc := mock.Alloc()
   412  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   413  	state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   414  	if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil {
   415  		t.Fatalf("err: %v", err)
   416  	}
   417  
   418  	// Update the time tables to make this work
   419  	tt := s1.fsm.TimeTable()
   420  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold))
   421  
   422  	// Create a core scheduler
   423  	snap, err := state.Snapshot()
   424  	if err != nil {
   425  		t.Fatalf("err: %v", err)
   426  	}
   427  	core := NewCoreScheduler(s1, snap)
   428  
   429  	// Attempt the GC
   430  	gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000)
   431  	err = core.Process(gc)
   432  	if err != nil {
   433  		t.Fatalf("err: %v", err)
   434  	}
   435  
   436  	// Should be gone
   437  	out, err := state.NodeByID(node.ID)
   438  	if err != nil {
   439  		t.Fatalf("err: %v", err)
   440  	}
   441  	if out != nil {
   442  		t.Fatalf("bad: %v", out)
   443  	}
   444  }
   445  
   446  func TestCoreScheduler_NodeGC_RunningAllocs(t *testing.T) {
   447  	s1 := testServer(t, nil)
   448  	defer s1.Shutdown()
   449  	testutil.WaitForLeader(t, s1.RPC)
   450  
   451  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   452  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   453  
   454  	// Insert "dead" node
   455  	state := s1.fsm.State()
   456  	node := mock.Node()
   457  	node.Status = structs.NodeStatusDown
   458  	err := state.UpsertNode(1000, node)
   459  	if err != nil {
   460  		t.Fatalf("err: %v", err)
   461  	}
   462  
   463  	// Insert a running alloc on that node
   464  	alloc := mock.Alloc()
   465  	alloc.NodeID = node.ID
   466  	alloc.DesiredStatus = structs.AllocDesiredStatusRun
   467  	alloc.ClientStatus = structs.AllocClientStatusRunning
   468  	state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID))
   469  	if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil {
   470  		t.Fatalf("err: %v", err)
   471  	}
   472  
   473  	// Update the time tables to make this work
   474  	tt := s1.fsm.TimeTable()
   475  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold))
   476  
   477  	// Create a core scheduler
   478  	snap, err := state.Snapshot()
   479  	if err != nil {
   480  		t.Fatalf("err: %v", err)
   481  	}
   482  	core := NewCoreScheduler(s1, snap)
   483  
   484  	// Attempt the GC
   485  	gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000)
   486  	err = core.Process(gc)
   487  	if err != nil {
   488  		t.Fatalf("err: %v", err)
   489  	}
   490  
   491  	// Should still be here
   492  	out, err := state.NodeByID(node.ID)
   493  	if err != nil {
   494  		t.Fatalf("err: %v", err)
   495  	}
   496  	if out == nil {
   497  		t.Fatalf("bad: %v", out)
   498  	}
   499  }
   500  
   501  func TestCoreScheduler_NodeGC_Force(t *testing.T) {
   502  	s1 := testServer(t, nil)
   503  	defer s1.Shutdown()
   504  	testutil.WaitForLeader(t, s1.RPC)
   505  
   506  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   507  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   508  
   509  	// Insert "dead" node
   510  	state := s1.fsm.State()
   511  	node := mock.Node()
   512  	node.Status = structs.NodeStatusDown
   513  	err := state.UpsertNode(1000, node)
   514  	if err != nil {
   515  		t.Fatalf("err: %v", err)
   516  	}
   517  
   518  	// Create a core scheduler
   519  	snap, err := state.Snapshot()
   520  	if err != nil {
   521  		t.Fatalf("err: %v", err)
   522  	}
   523  	core := NewCoreScheduler(s1, snap)
   524  
   525  	// Attempt the GC
   526  	gc := s1.coreJobEval(structs.CoreJobForceGC, 1000)
   527  	err = core.Process(gc)
   528  	if err != nil {
   529  		t.Fatalf("err: %v", err)
   530  	}
   531  
   532  	// Should be gone
   533  	out, err := state.NodeByID(node.ID)
   534  	if err != nil {
   535  		t.Fatalf("err: %v", err)
   536  	}
   537  	if out != nil {
   538  		t.Fatalf("bad: %v", out)
   539  	}
   540  }
   541  
   542  func TestCoreScheduler_JobGC_OutstandingEvals(t *testing.T) {
   543  	s1 := testServer(t, nil)
   544  	defer s1.Shutdown()
   545  	testutil.WaitForLeader(t, s1.RPC)
   546  
   547  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   548  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   549  
   550  	// Insert job.
   551  	state := s1.fsm.State()
   552  	job := mock.Job()
   553  	job.Type = structs.JobTypeBatch
   554  	job.Status = structs.JobStatusDead
   555  	err := state.UpsertJob(1000, job)
   556  	if err != nil {
   557  		t.Fatalf("err: %v", err)
   558  	}
   559  
   560  	// Insert two evals, one terminal and one not
   561  	eval := mock.Eval()
   562  	eval.JobID = job.ID
   563  	eval.Status = structs.EvalStatusComplete
   564  
   565  	eval2 := mock.Eval()
   566  	eval2.JobID = job.ID
   567  	eval2.Status = structs.EvalStatusPending
   568  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
   569  	if err != nil {
   570  		t.Fatalf("err: %v", err)
   571  	}
   572  
   573  	// Update the time tables to make this work
   574  	tt := s1.fsm.TimeTable()
   575  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
   576  
   577  	// Create a core scheduler
   578  	snap, err := state.Snapshot()
   579  	if err != nil {
   580  		t.Fatalf("err: %v", err)
   581  	}
   582  	core := NewCoreScheduler(s1, snap)
   583  
   584  	// Attempt the GC
   585  	gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
   586  	err = core.Process(gc)
   587  	if err != nil {
   588  		t.Fatalf("err: %v", err)
   589  	}
   590  
   591  	// Should still exist
   592  	out, err := state.JobByID(job.ID)
   593  	if err != nil {
   594  		t.Fatalf("err: %v", err)
   595  	}
   596  	if out == nil {
   597  		t.Fatalf("bad: %v", out)
   598  	}
   599  
   600  	outE, err := state.EvalByID(eval.ID)
   601  	if err != nil {
   602  		t.Fatalf("err: %v", err)
   603  	}
   604  	if outE == nil {
   605  		t.Fatalf("bad: %v", outE)
   606  	}
   607  
   608  	outE2, err := state.EvalByID(eval2.ID)
   609  	if err != nil {
   610  		t.Fatalf("err: %v", err)
   611  	}
   612  	if outE2 == nil {
   613  		t.Fatalf("bad: %v", outE2)
   614  	}
   615  
   616  	// Update the second eval to be terminal
   617  	eval2.Status = structs.EvalStatusComplete
   618  	err = state.UpsertEvals(1003, []*structs.Evaluation{eval2})
   619  	if err != nil {
   620  		t.Fatalf("err: %v", err)
   621  	}
   622  
   623  	// Create a core scheduler
   624  	snap, err = state.Snapshot()
   625  	if err != nil {
   626  		t.Fatalf("err: %v", err)
   627  	}
   628  	core = NewCoreScheduler(s1, snap)
   629  
   630  	// Attempt the GC
   631  	gc = s1.coreJobEval(structs.CoreJobJobGC, 2000)
   632  	err = core.Process(gc)
   633  	if err != nil {
   634  		t.Fatalf("err: %v", err)
   635  	}
   636  
   637  	// Should not still exist
   638  	out, err = state.JobByID(job.ID)
   639  	if err != nil {
   640  		t.Fatalf("err: %v", err)
   641  	}
   642  	if out != nil {
   643  		t.Fatalf("bad: %v", out)
   644  	}
   645  
   646  	outE, err = state.EvalByID(eval.ID)
   647  	if err != nil {
   648  		t.Fatalf("err: %v", err)
   649  	}
   650  	if outE != nil {
   651  		t.Fatalf("bad: %v", outE)
   652  	}
   653  
   654  	outE2, err = state.EvalByID(eval2.ID)
   655  	if err != nil {
   656  		t.Fatalf("err: %v", err)
   657  	}
   658  	if outE2 != nil {
   659  		t.Fatalf("bad: %v", outE2)
   660  	}
   661  }
   662  
   663  func TestCoreScheduler_JobGC_OutstandingAllocs(t *testing.T) {
   664  	s1 := testServer(t, nil)
   665  	defer s1.Shutdown()
   666  	testutil.WaitForLeader(t, s1.RPC)
   667  
   668  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   669  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   670  
   671  	// Insert job.
   672  	state := s1.fsm.State()
   673  	job := mock.Job()
   674  	job.Type = structs.JobTypeBatch
   675  	job.Status = structs.JobStatusDead
   676  	err := state.UpsertJob(1000, job)
   677  	if err != nil {
   678  		t.Fatalf("err: %v", err)
   679  	}
   680  
   681  	// Insert an eval
   682  	eval := mock.Eval()
   683  	eval.JobID = job.ID
   684  	eval.Status = structs.EvalStatusComplete
   685  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
   686  	if err != nil {
   687  		t.Fatalf("err: %v", err)
   688  	}
   689  
   690  	// Insert two allocs, one terminal and one not
   691  	alloc := mock.Alloc()
   692  	alloc.JobID = job.ID
   693  	alloc.EvalID = eval.ID
   694  	alloc.DesiredStatus = structs.AllocDesiredStatusRun
   695  	alloc.ClientStatus = structs.AllocClientStatusComplete
   696  
   697  	alloc2 := mock.Alloc()
   698  	alloc2.JobID = job.ID
   699  	alloc2.EvalID = eval.ID
   700  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   701  	alloc2.ClientStatus = structs.AllocClientStatusRunning
   702  
   703  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
   704  	if err != nil {
   705  		t.Fatalf("err: %v", err)
   706  	}
   707  
   708  	// Update the time tables to make this work
   709  	tt := s1.fsm.TimeTable()
   710  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
   711  
   712  	// Create a core scheduler
   713  	snap, err := state.Snapshot()
   714  	if err != nil {
   715  		t.Fatalf("err: %v", err)
   716  	}
   717  	core := NewCoreScheduler(s1, snap)
   718  
   719  	// Attempt the GC
   720  	gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
   721  	err = core.Process(gc)
   722  	if err != nil {
   723  		t.Fatalf("err: %v", err)
   724  	}
   725  
   726  	// Should still exist
   727  	out, err := state.JobByID(job.ID)
   728  	if err != nil {
   729  		t.Fatalf("err: %v", err)
   730  	}
   731  	if out == nil {
   732  		t.Fatalf("bad: %v", out)
   733  	}
   734  
   735  	outA, err := state.AllocByID(alloc.ID)
   736  	if err != nil {
   737  		t.Fatalf("err: %v", err)
   738  	}
   739  	if outA == nil {
   740  		t.Fatalf("bad: %v", outA)
   741  	}
   742  
   743  	outA2, err := state.AllocByID(alloc2.ID)
   744  	if err != nil {
   745  		t.Fatalf("err: %v", err)
   746  	}
   747  	if outA2 == nil {
   748  		t.Fatalf("bad: %v", outA2)
   749  	}
   750  
   751  	// Update the second alloc to be terminal
   752  	alloc2.ClientStatus = structs.AllocClientStatusComplete
   753  	err = state.UpsertAllocs(1003, []*structs.Allocation{alloc2})
   754  	if err != nil {
   755  		t.Fatalf("err: %v", err)
   756  	}
   757  
   758  	// Create a core scheduler
   759  	snap, err = state.Snapshot()
   760  	if err != nil {
   761  		t.Fatalf("err: %v", err)
   762  	}
   763  	core = NewCoreScheduler(s1, snap)
   764  
   765  	// Attempt the GC
   766  	gc = s1.coreJobEval(structs.CoreJobJobGC, 2000)
   767  	err = core.Process(gc)
   768  	if err != nil {
   769  		t.Fatalf("err: %v", err)
   770  	}
   771  
   772  	// Should not still exist
   773  	out, err = state.JobByID(job.ID)
   774  	if err != nil {
   775  		t.Fatalf("err: %v", err)
   776  	}
   777  	if out != nil {
   778  		t.Fatalf("bad: %v", out)
   779  	}
   780  
   781  	outA, err = state.AllocByID(alloc.ID)
   782  	if err != nil {
   783  		t.Fatalf("err: %v", err)
   784  	}
   785  	if outA != nil {
   786  		t.Fatalf("bad: %v", outA)
   787  	}
   788  
   789  	outA2, err = state.AllocByID(alloc2.ID)
   790  	if err != nil {
   791  		t.Fatalf("err: %v", err)
   792  	}
   793  	if outA2 != nil {
   794  		t.Fatalf("bad: %v", outA2)
   795  	}
   796  }
   797  
   798  // This test ensures that batch jobs are GC'd in one shot, meaning it all
   799  // allocs/evals and job or nothing
   800  func TestCoreScheduler_JobGC_OneShot(t *testing.T) {
   801  	s1 := testServer(t, nil)
   802  	defer s1.Shutdown()
   803  	testutil.WaitForLeader(t, s1.RPC)
   804  
   805  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   806  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   807  
   808  	// Insert job.
   809  	state := s1.fsm.State()
   810  	job := mock.Job()
   811  	job.Type = structs.JobTypeBatch
   812  	err := state.UpsertJob(1000, job)
   813  	if err != nil {
   814  		t.Fatalf("err: %v", err)
   815  	}
   816  
   817  	// Insert two complete evals
   818  	eval := mock.Eval()
   819  	eval.JobID = job.ID
   820  	eval.Status = structs.EvalStatusComplete
   821  
   822  	eval2 := mock.Eval()
   823  	eval2.JobID = job.ID
   824  	eval2.Status = structs.EvalStatusComplete
   825  
   826  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2})
   827  	if err != nil {
   828  		t.Fatalf("err: %v", err)
   829  	}
   830  
   831  	// Insert one complete alloc and one running on distinct evals
   832  	alloc := mock.Alloc()
   833  	alloc.JobID = job.ID
   834  	alloc.EvalID = eval.ID
   835  	alloc.DesiredStatus = structs.AllocDesiredStatusStop
   836  
   837  	alloc2 := mock.Alloc()
   838  	alloc2.JobID = job.ID
   839  	alloc2.EvalID = eval2.ID
   840  	alloc2.DesiredStatus = structs.AllocDesiredStatusRun
   841  
   842  	err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2})
   843  	if err != nil {
   844  		t.Fatalf("err: %v", err)
   845  	}
   846  
   847  	// Force the jobs state to dead
   848  	job.Status = structs.JobStatusDead
   849  
   850  	// Update the time tables to make this work
   851  	tt := s1.fsm.TimeTable()
   852  	tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold))
   853  
   854  	// Create a core scheduler
   855  	snap, err := state.Snapshot()
   856  	if err != nil {
   857  		t.Fatalf("err: %v", err)
   858  	}
   859  	core := NewCoreScheduler(s1, snap)
   860  
   861  	// Attempt the GC
   862  	gc := s1.coreJobEval(structs.CoreJobJobGC, 2000)
   863  	err = core.Process(gc)
   864  	if err != nil {
   865  		t.Fatalf("err: %v", err)
   866  	}
   867  
   868  	// Should still exist
   869  	out, err := state.JobByID(job.ID)
   870  	if err != nil {
   871  		t.Fatalf("err: %v", err)
   872  	}
   873  	if out == nil {
   874  		t.Fatalf("bad: %v", out)
   875  	}
   876  
   877  	outE, err := state.EvalByID(eval.ID)
   878  	if err != nil {
   879  		t.Fatalf("err: %v", err)
   880  	}
   881  	if outE == nil {
   882  		t.Fatalf("bad: %v", outE)
   883  	}
   884  
   885  	outE2, err := state.EvalByID(eval2.ID)
   886  	if err != nil {
   887  		t.Fatalf("err: %v", err)
   888  	}
   889  	if outE2 == nil {
   890  		t.Fatalf("bad: %v", outE2)
   891  	}
   892  
   893  	outA, err := state.AllocByID(alloc.ID)
   894  	if err != nil {
   895  		t.Fatalf("err: %v", err)
   896  	}
   897  	if outA == nil {
   898  		t.Fatalf("bad: %v", outA)
   899  	}
   900  	outA2, err := state.AllocByID(alloc2.ID)
   901  	if err != nil {
   902  		t.Fatalf("err: %v", err)
   903  	}
   904  	if outA2 == nil {
   905  		t.Fatalf("bad: %v", outA2)
   906  	}
   907  }
   908  
   909  func TestCoreScheduler_JobGC_Force(t *testing.T) {
   910  	s1 := testServer(t, nil)
   911  	defer s1.Shutdown()
   912  	testutil.WaitForLeader(t, s1.RPC)
   913  
   914  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   915  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   916  
   917  	// Insert job.
   918  	state := s1.fsm.State()
   919  	job := mock.Job()
   920  	job.Type = structs.JobTypeBatch
   921  	job.Status = structs.JobStatusDead
   922  	err := state.UpsertJob(1000, job)
   923  	if err != nil {
   924  		t.Fatalf("err: %v", err)
   925  	}
   926  
   927  	// Insert a terminal eval
   928  	eval := mock.Eval()
   929  	eval.JobID = job.ID
   930  	eval.Status = structs.EvalStatusComplete
   931  	err = state.UpsertEvals(1001, []*structs.Evaluation{eval})
   932  	if err != nil {
   933  		t.Fatalf("err: %v", err)
   934  	}
   935  
   936  	// Create a core scheduler
   937  	snap, err := state.Snapshot()
   938  	if err != nil {
   939  		t.Fatalf("err: %v", err)
   940  	}
   941  	core := NewCoreScheduler(s1, snap)
   942  
   943  	// Attempt the GC
   944  	gc := s1.coreJobEval(structs.CoreJobForceGC, 1002)
   945  	err = core.Process(gc)
   946  	if err != nil {
   947  		t.Fatalf("err: %v", err)
   948  	}
   949  
   950  	// Shouldn't still exist
   951  	out, err := state.JobByID(job.ID)
   952  	if err != nil {
   953  		t.Fatalf("err: %v", err)
   954  	}
   955  	if out != nil {
   956  		t.Fatalf("bad: %v", out)
   957  	}
   958  
   959  	outE, err := state.EvalByID(eval.ID)
   960  	if err != nil {
   961  		t.Fatalf("err: %v", err)
   962  	}
   963  	if outE != nil {
   964  		t.Fatalf("bad: %v", outE)
   965  	}
   966  }
   967  
   968  func TestCoreScheduler_PartitionReap(t *testing.T) {
   969  	s1 := testServer(t, nil)
   970  	defer s1.Shutdown()
   971  	testutil.WaitForLeader(t, s1.RPC)
   972  
   973  	// COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0
   974  	s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10)
   975  
   976  	// Create a core scheduler
   977  	snap, err := s1.fsm.State().Snapshot()
   978  	if err != nil {
   979  		t.Fatalf("err: %v", err)
   980  	}
   981  	core := NewCoreScheduler(s1, snap)
   982  
   983  	// Set the max ids per reap to something lower.
   984  	maxIdsPerReap = 2
   985  
   986  	evals := []string{"a", "b", "c"}
   987  	allocs := []string{"1", "2", "3"}
   988  	requests := core.(*CoreScheduler).partitionReap(evals, allocs)
   989  	if len(requests) != 3 {
   990  		t.Fatalf("Expected 3 requests got: %v", requests)
   991  	}
   992  
   993  	first := requests[0]
   994  	if len(first.Allocs) != 2 && len(first.Evals) != 0 {
   995  		t.Fatalf("Unexpected first request: %v", first)
   996  	}
   997  
   998  	second := requests[1]
   999  	if len(second.Allocs) != 1 && len(second.Evals) != 1 {
  1000  		t.Fatalf("Unexpected second request: %v", second)
  1001  	}
  1002  
  1003  	third := requests[2]
  1004  	if len(third.Allocs) != 0 && len(third.Evals) != 2 {
  1005  		t.Fatalf("Unexpected third request: %v", third)
  1006  	}
  1007  }