github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/scheduler_sysbatch/sysbatch.go (about) 1 package scheduler_sysbatch 2 3 import ( 4 "strings" 5 "time" 6 7 "github.com/hashicorp/nomad/api" 8 "github.com/hashicorp/nomad/e2e/e2eutil" 9 "github.com/hashicorp/nomad/e2e/framework" 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/stretchr/testify/assert" 12 "github.com/stretchr/testify/require" 13 ) 14 15 type SysBatchSchedulerTest struct { 16 framework.TC 17 jobIDs []string 18 } 19 20 func init() { 21 framework.AddSuites(&framework.TestSuite{ 22 Component: "SysBatchScheduler", 23 CanRunLocal: true, 24 Cases: []framework.TestCase{ 25 new(SysBatchSchedulerTest), 26 }, 27 }) 28 } 29 30 func (tc *SysBatchSchedulerTest) BeforeAll(f *framework.F) { 31 // Ensure cluster has leader before running tests 32 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 33 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4) 34 } 35 36 func (tc *SysBatchSchedulerTest) TestJobRunBasic(f *framework.F) { 37 t := f.T() 38 nomadClient := tc.Nomad() 39 40 // submit a fast sysbatch job 41 jobID := "sysbatch_run_basic" 42 tc.jobIDs = append(tc.jobIDs, jobID) 43 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "") 44 45 // get our allocations for this sysbatch job 46 jobs := nomadClient.Jobs() 47 allocs, _, err := jobs.Allocations(jobID, true, nil) 48 require.NoError(t, err) 49 50 // make sure this is job is being run on "all" the linux clients 51 require.True(t, len(allocs) >= 3) 52 53 // wait for every alloc to reach completion 54 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 55 e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete) 56 } 57 58 func (tc *SysBatchSchedulerTest) TestJobStopEarly(f *framework.F) { 59 t := f.T() 60 nomadClient := tc.Nomad() 61 62 // submit a slow sysbatch job 63 jobID := "sysbatch_stop_early" 64 tc.jobIDs = append(tc.jobIDs, jobID) 65 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "") 66 67 // get our allocations for this sysbatch job 68 jobs := nomadClient.Jobs() 69 allocs, _, err := jobs.Allocations(jobID, true, nil) 70 require.NoError(t, err) 71 72 // make sure this is job is being run on "all" the linux clients 73 require.True(t, len(allocs) >= 3) 74 75 // wait for every alloc to reach running status 76 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 77 e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusRunning) 78 79 // stop the job before allocs reach completion 80 _, _, err = jobs.Deregister(jobID, false, nil) 81 require.NoError(t, err) 82 } 83 84 func (tc *SysBatchSchedulerTest) TestJobReplaceRunning(f *framework.F) { 85 t := f.T() 86 nomadClient := tc.Nomad() 87 88 // submit a slow sysbatch job 89 jobID := "sysbatch_replace_running" 90 tc.jobIDs = append(tc.jobIDs, jobID) 91 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "") 92 93 // get out allocations for this sysbatch job 94 jobs := nomadClient.Jobs() 95 allocs, _, err := jobs.Allocations(jobID, true, nil) 96 require.NoError(t, err) 97 98 // make sure this is job is being run on "all" the linux clients 99 require.True(t, len(allocs) >= 3) 100 101 // wait for every alloc to reach running status 102 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 103 e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusRunning) 104 105 // replace the slow job with the fast job 106 intermediate := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "") 107 108 // get the allocs for the new updated job 109 var updated []*api.AllocationListStub 110 for _, alloc := range intermediate { 111 if alloc.JobVersion == 1 { 112 updated = append(updated, alloc) 113 } 114 } 115 116 // should be equal number of old and new allocs 117 newAllocIDs := e2eutil.AllocIDsFromAllocationListStubs(updated) 118 119 // make sure this new job is being run on "all" the linux clients 120 require.True(t, len(updated) >= 3) 121 122 // wait for the allocs of the fast job to complete 123 e2eutil.WaitForAllocsStatus(t, nomadClient, newAllocIDs, structs.AllocClientStatusComplete) 124 } 125 126 func (tc *SysBatchSchedulerTest) TestJobReplaceDead(f *framework.F) { 127 t := f.T() 128 nomadClient := tc.Nomad() 129 130 // submit a fast sysbatch job 131 jobID := "sysbatch_replace_dead" 132 tc.jobIDs = append(tc.jobIDs, jobID) 133 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_fast.nomad", jobID, "") 134 135 // get the allocations for this sysbatch job 136 jobs := nomadClient.Jobs() 137 allocs, _, err := jobs.Allocations(jobID, true, nil) 138 require.NoError(t, err) 139 140 // make sure this is job is being run on "all" the linux clients 141 require.True(t, len(allocs) >= 3) 142 143 // wait for every alloc to reach complete status 144 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 145 e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete) 146 147 // replace the fast job with the slow job 148 intermediate := e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_sysbatch/input/sysbatch_job_slow.nomad", jobID, "") 149 150 // get the allocs for the new updated job 151 var updated []*api.AllocationListStub 152 for _, alloc := range intermediate { 153 if alloc.JobVersion == 1 { 154 updated = append(updated, alloc) 155 } 156 } 157 158 // should be equal number of old and new allocs 159 upAllocIDs := e2eutil.AllocIDsFromAllocationListStubs(updated) 160 161 // make sure this new job is being run on "all" the linux clients 162 require.True(t, len(updated) >= 3) 163 164 // wait for the allocs of the slow job to be running 165 e2eutil.WaitForAllocsStatus(t, nomadClient, upAllocIDs, structs.AllocClientStatusRunning) 166 } 167 168 func (tc *SysBatchSchedulerTest) TestJobRunPeriodic(f *framework.F) { 169 t := f.T() 170 nomadClient := tc.Nomad() 171 172 // submit a fast sysbatch job 173 jobID := "sysbatch_job_periodic" 174 tc.jobIDs = append(tc.jobIDs, jobID) 175 err := e2eutil.Register(jobID, "scheduler_sysbatch/input/sysbatch_periodic.nomad") 176 require.NoError(t, err) 177 178 // force the cron job to run 179 jobs := nomadClient.Jobs() 180 _, _, err = jobs.PeriodicForce(jobID, nil) 181 require.NoError(t, err) 182 183 // find the cron job that got launched 184 jobsList, _, err := jobs.List(nil) 185 require.NoError(t, err) 186 cronJobID := "" 187 for _, job := range jobsList { 188 if strings.HasPrefix(job.Name, "sysbatch_job_periodic/periodic-") { 189 cronJobID = job.Name 190 break 191 } 192 } 193 require.NotEmpty(t, cronJobID) 194 tc.jobIDs = append(tc.jobIDs, cronJobID) 195 196 // wait for allocs of the cron job 197 var allocs []*api.AllocationListStub 198 require.True(t, assert.Eventually(t, func() bool { 199 var err error 200 allocs, _, err = jobs.Allocations(cronJobID, false, nil) 201 require.NoError(t, err) 202 return len(allocs) >= 3 203 }, 30*time.Second, time.Second)) 204 205 // wait for every cron job alloc to reach completion 206 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 207 e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete) 208 } 209 210 func (tc *SysBatchSchedulerTest) TestJobRunDispatch(f *framework.F) { 211 t := f.T() 212 nomadClient := tc.Nomad() 213 214 // submit a fast sysbatch dispatch job 215 jobID := "sysbatch_job_dispatch" 216 tc.jobIDs = append(tc.jobIDs, jobID) 217 err := e2eutil.Register(jobID, "scheduler_sysbatch/input/sysbatch_dispatch.nomad") 218 require.NoError(t, err) 219 220 // dispatch the sysbatch job 221 jobs := nomadClient.Jobs() 222 result, _, err := jobs.Dispatch(jobID, map[string]string{ 223 "KEY": "value", 224 }, nil, "", nil) 225 require.NoError(t, err) 226 227 // grab the new dispatched jobID 228 dispatchID := result.DispatchedJobID 229 tc.jobIDs = append(tc.jobIDs, dispatchID) 230 231 // wait for allocs of the dispatched job 232 var allocs []*api.AllocationListStub 233 require.True(t, assert.Eventually(t, func() bool { 234 var err error 235 allocs, _, err = jobs.Allocations(dispatchID, false, nil) 236 require.NoError(t, err) 237 return len(allocs) >= 3 238 }, 30*time.Second, time.Second)) 239 240 // wait for every dispatch alloc to reach completion 241 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 242 e2eutil.WaitForAllocsStatus(t, nomadClient, allocIDs, structs.AllocClientStatusComplete) 243 } 244 245 func (tc *SysBatchSchedulerTest) AfterEach(f *framework.F) { 246 nomadClient := tc.Nomad() 247 248 // Mark all nodes eligible 249 nodesAPI := tc.Nomad().Nodes() 250 nodes, _, _ := nodesAPI.List(nil) 251 for _, node := range nodes { 252 _, _ = nodesAPI.ToggleEligibility(node.ID, true, nil) 253 } 254 255 jobs := nomadClient.Jobs() 256 257 // Stop all jobs in test 258 for _, id := range tc.jobIDs { 259 _, _, _ = jobs.Deregister(id, true, nil) 260 } 261 tc.jobIDs = []string{} 262 263 // Garbage collect 264 _ = nomadClient.System().GarbageCollect() 265 }