github.com/hernad/nomad@v1.6.112/e2e/scheduler_system/systemsched.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package scheduler_system 5 6 import ( 7 "github.com/hernad/nomad/api" 8 "github.com/hernad/nomad/e2e/e2eutil" 9 "github.com/hernad/nomad/e2e/framework" 10 "github.com/hernad/nomad/nomad/structs" 11 "github.com/stretchr/testify/require" 12 ) 13 14 type SystemSchedTest struct { 15 framework.TC 16 jobIDs []string 17 } 18 19 func init() { 20 framework.AddSuites(&framework.TestSuite{ 21 Component: "SystemScheduler", 22 CanRunLocal: true, 23 Cases: []framework.TestCase{ 24 new(SystemSchedTest), 25 }, 26 }) 27 } 28 29 func (tc *SystemSchedTest) BeforeAll(f *framework.F) { 30 // Ensure cluster has leader before running tests 31 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 32 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4) 33 } 34 35 func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) { 36 t := f.T() 37 nomadClient := tc.Nomad() 38 39 jobID := "system_deployment" 40 tc.jobIDs = append(tc.jobIDs, jobID) 41 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job0.nomad", jobID, "") 42 43 jobs := nomadClient.Jobs() 44 allocs, _, err := jobs.Allocations(jobID, true, nil) 45 require.NoError(t, err) 46 require.True(t, len(allocs) >= 3) 47 48 allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) 49 50 // Wait for allocations to get past initial pending state 51 e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) 52 53 // Mark one node as ineligible 54 nodesAPI := tc.Nomad().Nodes() 55 disabledNodeID := allocs[0].NodeID 56 _, err = nodesAPI.ToggleEligibility(disabledNodeID, false, nil) 57 require.NoError(t, err) 58 59 // Assert all jobs still running 60 jobs = nomadClient.Jobs() 61 allocs, _, err = jobs.Allocations(jobID, true, nil) 62 require.NoError(t, err) 63 64 allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs) 65 allocForDisabledNode := make(map[string]*api.AllocationListStub) 66 67 // Wait for allocs to run and collect allocs on ineligible node 68 // Allocation could have failed, ensure there is one thats running 69 // and that it is the correct version (0) 70 e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) 71 for _, alloc := range allocs { 72 if alloc.NodeID == disabledNodeID { 73 allocForDisabledNode[alloc.ID] = alloc 74 } 75 } 76 77 // Filter down to only our latest running alloc 78 for _, alloc := range allocForDisabledNode { 79 require.Equal(t, uint64(0), alloc.JobVersion) 80 if alloc.ClientStatus == structs.AllocClientStatusComplete { 81 // remove the old complete alloc from map 82 delete(allocForDisabledNode, alloc.ID) 83 } 84 } 85 require.NotEmpty(t, allocForDisabledNode) 86 require.Len(t, allocForDisabledNode, 1) 87 88 // Update job 89 e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job1.nomad", jobID, "") 90 91 // Get updated allocations 92 jobs = nomadClient.Jobs() 93 allocs, _, err = jobs.Allocations(jobID, false, nil) 94 require.NoError(t, err) 95 96 // Wait for allocs to start 97 allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs) 98 e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs) 99 100 // Get latest alloc status now that they are no longer pending 101 allocs, _, err = jobs.Allocations(jobID, false, nil) 102 require.NoError(t, err) 103 104 var foundPreviousAlloc bool 105 for _, dAlloc := range allocForDisabledNode { 106 for _, alloc := range allocs { 107 if alloc.ID == dAlloc.ID { 108 foundPreviousAlloc = true 109 require.Equal(t, uint64(0), alloc.JobVersion) 110 } else if alloc.ClientStatus == structs.AllocClientStatusRunning { 111 // Ensure allocs running on non disabled node are 112 // newer version 113 require.Equal(t, uint64(1), alloc.JobVersion) 114 } 115 } 116 } 117 require.True(t, foundPreviousAlloc, "unable to find previous alloc for ineligible node") 118 } 119 120 func (tc *SystemSchedTest) AfterEach(f *framework.F) { 121 nomadClient := tc.Nomad() 122 123 // Mark all nodes eligible 124 nodesAPI := tc.Nomad().Nodes() 125 nodes, _, _ := nodesAPI.List(nil) 126 for _, node := range nodes { 127 nodesAPI.ToggleEligibility(node.ID, true, nil) 128 } 129 130 jobs := nomadClient.Jobs() 131 // Stop all jobs in test 132 for _, id := range tc.jobIDs { 133 jobs.Deregister(id, true, nil) 134 } 135 tc.jobIDs = []string{} 136 // Garbage collect 137 nomadClient.System().GarbageCollect() 138 }