github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/scheduler_system/systemsched.go (about) 1 package scheduler_system 2 3 import ( 4 "github.com/hashicorp/nomad/api" 5 "github.com/hashicorp/nomad/e2e/e2eutil" 6 "github.com/hashicorp/nomad/e2e/framework" 7 "github.com/hashicorp/nomad/nomad/structs" 8 "github.com/stretchr/testify/require" 9 ) 10 11 type SystemSchedTest struct { 12 framework.TC 13 jobIDs []string 14 } 15 16 func init() { 17 framework.AddSuites(&framework.TestSuite{ 18 Component: "SystemScheduler", 19 CanRunLocal: true, 20 Cases: []framework.TestCase{ 21 new(SystemSchedTest), 22 }, 23 }) 24 } 25 26 func (tc *SystemSchedTest) BeforeAll(f *framework.F) { 27 // Ensure cluster has leader before running tests 28 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 29 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4) 30 } 31 32 func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) { 33 t := f.T() 34 nomadClient := tc.Nomad() 35 36 jobID := "system_deployment" 37 tc.jobIDs = append(tc.jobIDs, jobID) 38 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job0.nomad", jobID, "") 39 40 jobs := nomadClient.Jobs() 41 allocs, _, err := jobs.Allocations(jobID, true, nil) 42 require.NoError(t, err) 43 require.True(t, len(allocs) >= 3) 44 45 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 46 47 // Wait for allocations to get past initial pending state 48 e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) 49 50 // Mark one node as ineligible 51 nodesAPI := tc.Nomad().Nodes() 52 disabledNodeID := allocs[0].NodeID 53 _, err = nodesAPI.ToggleEligibility(disabledNodeID, false, nil) 54 require.NoError(t, err) 55 56 // Assert all jobs still running 57 jobs = nomadClient.Jobs() 58 allocs, _, err = jobs.Allocations(jobID, true, nil) 59 require.NoError(t, err) 60 61 allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs) 62 allocForDisabledNode := make(map[string]*api.AllocationListStub) 63 64 // Wait for allocs to run and collect allocs on ineligible node 65 // Allocation could have failed, ensure there is one thats running 66 // and that it is the correct version (0) 67 e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) 68 for _, alloc := range allocs { 69 if alloc.NodeID == disabledNodeID { 70 allocForDisabledNode[alloc.ID] = alloc 71 } 72 } 73 74 // Filter down to only our latest running alloc 75 for _, alloc := range allocForDisabledNode { 76 require.Equal(t, uint64(0), alloc.JobVersion) 77 if alloc.ClientStatus == structs.AllocClientStatusComplete { 78 // remove the old complete alloc from map 79 delete(allocForDisabledNode, alloc.ID) 80 } 81 } 82 require.NotEmpty(t, allocForDisabledNode) 83 require.Len(t, allocForDisabledNode, 1) 84 85 // Update job 86 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job1.nomad", jobID, "") 87 88 // Get updated allocations 89 jobs = nomadClient.Jobs() 90 allocs, _, err = jobs.Allocations(jobID, false, nil) 91 require.NoError(t, err) 92 93 // Wait for allocs to start 94 allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs) 95 e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) 96 97 // Get latest alloc status now that they are no longer pending 98 allocs, _, err = jobs.Allocations(jobID, false, nil) 99 require.NoError(t, err) 100 101 var foundPreviousAlloc bool 102 for _, dAlloc := range allocForDisabledNode { 103 for _, alloc := range allocs { 104 if alloc.ID == dAlloc.ID { 105 foundPreviousAlloc = true 106 require.Equal(t, uint64(0), alloc.JobVersion) 107 } else if alloc.ClientStatus == structs.AllocClientStatusRunning { 108 // Ensure allocs running on non disabled node are 109 // newer version 110 require.Equal(t, uint64(1), alloc.JobVersion) 111 } 112 } 113 } 114 require.True(t, foundPreviousAlloc, "unable to find previous alloc for ineligible node") 115 } 116 117 func (tc *SystemSchedTest) AfterEach(f *framework.F) { 118 nomadClient := tc.Nomad() 119 120 // Mark all nodes eligible 121 nodesAPI := tc.Nomad().Nodes() 122 nodes, _, _ := nodesAPI.List(nil) 123 for _, node := range nodes { 124 nodesAPI.ToggleEligibility(node.ID, true, nil) 125 } 126 127 jobs := nomadClient.Jobs() 128 // Stop all jobs in test 129 for _, id := range tc.jobIDs { 130 jobs.Deregister(id, true, nil) 131 } 132 tc.jobIDs = []string{} 133 // Garbage collect 134 nomadClient.System().GarbageCollect() 135 }