github.com/hernad/nomad@v1.6.112/e2e/scheduler_system/systemsched.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package scheduler_system
     5  
     6  import (
     7  	"github.com/hernad/nomad/api"
     8  	"github.com/hernad/nomad/e2e/e2eutil"
     9  	"github.com/hernad/nomad/e2e/framework"
    10  	"github.com/hernad/nomad/nomad/structs"
    11  	"github.com/stretchr/testify/require"
    12  )
    13  
    14  type SystemSchedTest struct {
    15  	framework.TC
    16  	jobIDs []string
    17  }
    18  
    19  func init() {
    20  	framework.AddSuites(&framework.TestSuite{
    21  		Component:   "SystemScheduler",
    22  		CanRunLocal: true,
    23  		Cases: []framework.TestCase{
    24  			new(SystemSchedTest),
    25  		},
    26  	})
    27  }
    28  
    29  func (tc *SystemSchedTest) BeforeAll(f *framework.F) {
    30  	// Ensure cluster has leader before running tests
    31  	e2eutil.WaitForLeader(f.T(), tc.Nomad())
    32  	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4)
    33  }
    34  
    35  func (tc *SystemSchedTest) TestJobUpdateOnIneligbleNode(f *framework.F) {
    36  	t := f.T()
    37  	nomadClient := tc.Nomad()
    38  
    39  	jobID := "system_deployment"
    40  	tc.jobIDs = append(tc.jobIDs, jobID)
    41  	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job0.nomad", jobID, "")
    42  
    43  	jobs := nomadClient.Jobs()
    44  	allocs, _, err := jobs.Allocations(jobID, true, nil)
    45  	require.NoError(t, err)
    46  	require.True(t, len(allocs) >= 3)
    47  
    48  	allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs)
    49  
    50  	// Wait for allocations to get past initial pending state
    51  	e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
    52  
    53  	// Mark one node as ineligible
    54  	nodesAPI := tc.Nomad().Nodes()
    55  	disabledNodeID := allocs[0].NodeID
    56  	_, err = nodesAPI.ToggleEligibility(disabledNodeID, false, nil)
    57  	require.NoError(t, err)
    58  
    59  	// Assert all jobs still running
    60  	jobs = nomadClient.Jobs()
    61  	allocs, _, err = jobs.Allocations(jobID, true, nil)
    62  	require.NoError(t, err)
    63  
    64  	allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs)
    65  	allocForDisabledNode := make(map[string]*api.AllocationListStub)
    66  
    67  	// Wait for allocs to run and collect allocs on ineligible node
    68  	// Allocation could have failed, ensure there is one thats running
    69  	// and that it is the correct version (0)
    70  	e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
    71  	for _, alloc := range allocs {
    72  		if alloc.NodeID == disabledNodeID {
    73  			allocForDisabledNode[alloc.ID] = alloc
    74  		}
    75  	}
    76  
    77  	// Filter down to only our latest running alloc
    78  	for _, alloc := range allocForDisabledNode {
    79  		require.Equal(t, uint64(0), alloc.JobVersion)
    80  		if alloc.ClientStatus == structs.AllocClientStatusComplete {
    81  			// remove the old complete alloc from map
    82  			delete(allocForDisabledNode, alloc.ID)
    83  		}
    84  	}
    85  	require.NotEmpty(t, allocForDisabledNode)
    86  	require.Len(t, allocForDisabledNode, 1)
    87  
    88  	// Update job
    89  	e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "scheduler_system/input/system_job1.nomad", jobID, "")
    90  
    91  	// Get updated allocations
    92  	jobs = nomadClient.Jobs()
    93  	allocs, _, err = jobs.Allocations(jobID, false, nil)
    94  	require.NoError(t, err)
    95  
    96  	// Wait for allocs to start
    97  	allocIDs = e2eutil.AllocIDsFromAllocationListStubs(allocs)
    98  	e2eutil.WaitForAllocsNotPending(t, nomadClient, allocIDs)
    99  
   100  	// Get latest alloc status now that they are no longer pending
   101  	allocs, _, err = jobs.Allocations(jobID, false, nil)
   102  	require.NoError(t, err)
   103  
   104  	var foundPreviousAlloc bool
   105  	for _, dAlloc := range allocForDisabledNode {
   106  		for _, alloc := range allocs {
   107  			if alloc.ID == dAlloc.ID {
   108  				foundPreviousAlloc = true
   109  				require.Equal(t, uint64(0), alloc.JobVersion)
   110  			} else if alloc.ClientStatus == structs.AllocClientStatusRunning {
   111  				// Ensure allocs running on non disabled node are
   112  				// newer version
   113  				require.Equal(t, uint64(1), alloc.JobVersion)
   114  			}
   115  		}
   116  	}
   117  	require.True(t, foundPreviousAlloc, "unable to find previous alloc for ineligible node")
   118  }
   119  
   120  func (tc *SystemSchedTest) AfterEach(f *framework.F) {
   121  	nomadClient := tc.Nomad()
   122  
   123  	// Mark all nodes eligible
   124  	nodesAPI := tc.Nomad().Nodes()
   125  	nodes, _, _ := nodesAPI.List(nil)
   126  	for _, node := range nodes {
   127  		nodesAPI.ToggleEligibility(node.ID, true, nil)
   128  	}
   129  
   130  	jobs := nomadClient.Jobs()
   131  	// Stop all jobs in test
   132  	for _, id := range tc.jobIDs {
   133  		jobs.Deregister(id, true, nil)
   134  	}
   135  	tc.jobIDs = []string{}
   136  	// Garbage collect
   137  	nomadClient.System().GarbageCollect()
   138  }