github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/scheduler_sysbatch/sysbatch.go (about)

     1  package scheduler_sysbatch
     2  
     3  import (
     4  	"strings"
     5  	"time"
     6  
     7  	"github.com/hashicorp/nomad/api"
     8  	"github.com/hashicorp/nomad/e2e/e2eutil"
     9  	"github.com/hashicorp/nomad/e2e/framework"
    10  	"github.com/hashicorp/nomad/nomad/structs"
    11  	"github.com/stretchr/testify/assert"
    12  	"github.com/stretchr/testify/require"
    13  )
    14  
    15  type SysBatchSchedulerTest struct {
    16  	framework.TC
    17  	jobIDs []string
    18  }
    19  
    20  func init() {
    21  	framework.AddSuites(&framework.TestSuite{
    22  		Component:   "SysBatchScheduler",
    23  		CanRunLocal: true,
    24  		Cases: []framework.TestCase{
    25  			new(SysBatchSchedulerTest),
    26  		},
    27  	})
    28  }
    29  
    30  func (tc *SysBatchSchedulerTest) BeforeAll(f *framework.F) {
    31  	// Ensure cluster has leader before running tests
    32  	e2eutil.WaitForLeader(f.T(), tc.Nomad())
    33  	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4)
    34  }
    35  
    36  func (tc *SysBatchSchedulerTest) TestJobRunBasic(f *framework.F) {
    37  	t := f.T()
    38  	nomadClient := tc.Nomad()
    39  
    40  	// submit a fast sysbatch job
    41  	jobID := "sysbatch_run_basic"
    42  	tc.jobIDs = append(tc.jobIDs, jobID)
    43  	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "")
    44  
    45  	// get our allocations for this sysbatch job
    46  	jobs := nomadClient.Jobs()
    47  	allocs, _, err := jobs.Allocations(jobID, true, nil)
    48  	require.NoError(t, err)
    49  
    50  	// make sure this is job is being run on "all" the linux clients
    51  	require.True(t, len(allocs) >= 3)
    52  
    53  	// wait for every alloc to reach completion
    54  	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
    55  	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
    56  }
    57  
    58  func (tc *SysBatchSchedulerTest) TestJobStopEarly(f *framework.F) {
    59  	t := f.T()
    60  	nomadClient := tc.Nomad()
    61  
    62  	// submit a slow sysbatch job
    63  	jobID := "sysbatch_stop_early"
    64  	tc.jobIDs = append(tc.jobIDs, jobID)
    65  	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "")
    66  
    67  	// get our allocations for this sysbatch job
    68  	jobs := nomadClient.Jobs()
    69  	allocs, _, err := jobs.Allocations(jobID, true, nil)
    70  	require.NoError(t, err)
    71  
    72  	// make sure this is job is being run on "all" the linux clients
    73  	require.True(t, len(allocs) >= 3)
    74  
    75  	// wait for every alloc to reach running status
    76  	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
    77  	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusRunning)
    78  
    79  	// stop the job before allocs reach completion
    80  	_, _, err = jobs.Deregister(jobID, false, nil)
    81  	require.NoError(t, err)
    82  }
    83  
    84  func (tc *SysBatchSchedulerTest) TestJobReplaceRunning(f *framework.F) {
    85  	t := f.T()
    86  	nomadClient := tc.Nomad()
    87  
    88  	// submit a slow sysbatch job
    89  	jobID := "sysbatch_replace_running"
    90  	tc.jobIDs = append(tc.jobIDs, jobID)
    91  	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "")
    92  
    93  	// get out allocations for this sysbatch job
    94  	jobs := nomadClient.Jobs()
    95  	allocs, _, err := jobs.Allocations(jobID, true, nil)
    96  	require.NoError(t, err)
    97  
    98  	// make sure this is job is being run on "all" the linux clients
    99  	require.True(t, len(allocs) >= 3)
   100  
   101  	// wait for every alloc to reach running status
   102  	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
   103  	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusRunning)
   104  
   105  	// replace the slow job with the fast job
   106  	intermediate := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "")
   107  
   108  	// get the allocs for the new updated job
   109  	var updated []*api.AllocationListStub
   110  	for _, alloc := range intermediate {
   111  		if alloc.JobVersion == 1 {
   112  			updated = append(updated, alloc)
   113  		}
   114  	}
   115  
   116  	// should be equal number of old and new allocs
   117  	newAllocIDs := e2eutil.AllocIDsFromAllocationListStubs(updated)
   118  
   119  	// make sure this new job is being run on "all" the linux clients
   120  	require.True(t, len(updated) >= 3)
   121  
   122  	// wait for the allocs of the fast job to complete
   123  	e2eutil.WaitForAllocsStatus(t, nomadClient, newAllocIDs, structs.AllocClientStatusComplete)
   124  }
   125  
   126  func (tc *SysBatchSchedulerTest) TestJobReplaceDead(f *framework.F) {
   127  	t := f.T()
   128  	nomadClient := tc.Nomad()
   129  
   130  	// submit a fast sysbatch job
   131  	jobID := "sysbatch_replace_dead"
   132  	tc.jobIDs = append(tc.jobIDs, jobID)
   133  	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "")
   134  
   135  	// get the allocations for this sysbatch job
   136  	jobs := nomadClient.Jobs()
   137  	allocs, _, err := jobs.Allocations(jobID, true, nil)
   138  	require.NoError(t, err)
   139  
   140  	// make sure this is job is being run on "all" the linux clients
   141  	require.True(t, len(allocs) >= 3)
   142  
   143  	// wait for every alloc to reach complete status
   144  	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
   145  	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
   146  
   147  	// replace the fast job with the slow job
   148  	intermediate := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "")
   149  
   150  	// get the allocs for the new updated job
   151  	var updated []*api.AllocationListStub
   152  	for _, alloc := range intermediate {
   153  		if alloc.JobVersion == 1 {
   154  			updated = append(updated, alloc)
   155  		}
   156  	}
   157  
   158  	// should be equal number of old and new allocs
   159  	upAllocIDs := e2eutil.AllocIDsFromAllocationListStubs(updated)
   160  
   161  	// make sure this new job is being run on "all" the linux clients
   162  	require.True(t, len(updated) >= 3)
   163  
   164  	// wait for the allocs of the slow job to be running
   165  	e2eutil.WaitForAllocsStatus(t, nomadClient, upAllocIDs, structs.AllocClientStatusRunning)
   166  }
   167  
   168  func (tc *SysBatchSchedulerTest) TestJobRunPeriodic(f *framework.F) {
   169  	t := f.T()
   170  	nomadClient := tc.Nomad()
   171  
   172  	// submit a fast sysbatch job
   173  	jobID := "sysbatch_job_periodic"
   174  	tc.jobIDs = append(tc.jobIDs, jobID)
   175  	err := e2eutil.Register(jobID, "scheduler_sysbatch/input/sysbatch_periodic.nomad")
   176  	require.NoError(t, err)
   177  
   178  	// force the cron job to run
   179  	jobs := nomadClient.Jobs()
   180  	_, _, err = jobs.PeriodicForce(jobID, nil)
   181  	require.NoError(t, err)
   182  
   183  	// find the cron job that got launched
   184  	jobsList, _, err := jobs.List(nil)
   185  	require.NoError(t, err)
   186  	cronJobID := ""
   187  	for _, job := range jobsList {
   188  		if strings.HasPrefix(job.Name, "sysbatch_job_periodic/periodic-") {
   189  			cronJobID = job.Name
   190  			break
   191  		}
   192  	}
   193  	require.NotEmpty(t, cronJobID)
   194  	tc.jobIDs = append(tc.jobIDs, cronJobID)
   195  
   196  	// wait for allocs of the cron job
   197  	var allocs []*api.AllocationListStub
   198  	require.True(t, assert.Eventually(t, func() bool {
   199  		var err error
   200  		allocs, _, err = jobs.Allocations(cronJobID, false, nil)
   201  		require.NoError(t, err)
   202  		return len(allocs) >= 3
   203  	}, 30*time.Second, time.Second))
   204  
   205  	// wait for every cron job alloc to reach completion
   206  	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
   207  	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
   208  }
   209  
   210  func (tc *SysBatchSchedulerTest) TestJobRunDispatch(f *framework.F) {
   211  	t := f.T()
   212  	nomadClient := tc.Nomad()
   213  
   214  	// submit a fast sysbatch dispatch job
   215  	jobID := "sysbatch_job_dispatch"
   216  	tc.jobIDs = append(tc.jobIDs, jobID)
   217  	err := e2eutil.Register(jobID, "scheduler_sysbatch/input/sysbatch_dispatch.nomad")
   218  	require.NoError(t, err)
   219  
   220  	// dispatch the sysbatch job
   221  	jobs := nomadClient.Jobs()
   222  	result, _, err := jobs.Dispatch(jobID, map[string]string{
   223  		"KEY": "value",
   224  	}, nil, "", nil)
   225  	require.NoError(t, err)
   226  
   227  	// grab the new dispatched jobID
   228  	dispatchID := result.DispatchedJobID
   229  	tc.jobIDs = append(tc.jobIDs, dispatchID)
   230  
   231  	// wait for allocs of the dispatched job
   232  	var allocs []*api.AllocationListStub
   233  	require.True(t, assert.Eventually(t, func() bool {
   234  		var err error
   235  		allocs, _, err = jobs.Allocations(dispatchID, false, nil)
   236  		require.NoError(t, err)
   237  		return len(allocs) >= 3
   238  	}, 30*time.Second, time.Second))
   239  
   240  	// wait for every dispatch alloc to reach completion
   241  	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
   242  	e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete)
   243  }
   244  
   245  func (tc *SysBatchSchedulerTest) AfterEach(f *framework.F) {
   246  	nomadClient := tc.Nomad()
   247  
   248  	// Mark all nodes eligible
   249  	nodesAPI := tc.Nomad().Nodes()
   250  	nodes, _, _ := nodesAPI.List(nil)
   251  	for _, node := range nodes {
   252  		_, _ = nodesAPI.ToggleEligibility(node.ID, true, nil)
   253  	}
   254  
   255  	jobs := nomadClient.Jobs()
   256  
   257  	// Stop all jobs in test
   258  	for _, id := range tc.jobIDs {
   259  		_, _, _ = jobs.Deregister(id, true, nil)
   260  	}
   261  	tc.jobIDs = []string{}
   262  
   263  	// Garbage collect
   264  	_ = nomadClient.System().GarbageCollect()
   265  }