github.com/hernad/nomad@v1.6.112/nomad/drainer/watch_nodes_test.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package drainer
     5  
     6  import (
     7  	"fmt"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/shoenig/test/must"
    12  	"github.com/shoenig/test/wait"
    13  
    14  	"github.com/hernad/nomad/ci"
    15  	"github.com/hernad/nomad/helper/pointer"
    16  	"github.com/hernad/nomad/helper/uuid"
    17  	"github.com/hernad/nomad/nomad/mock"
    18  	"github.com/hernad/nomad/nomad/state"
    19  	"github.com/hernad/nomad/nomad/structs"
    20  )
    21  
    22  // TestNodeDrainWatcher_AddNodes tests that new nodes are added to the node
    23  // watcher and deadline notifier, but only if they have a drain spec.
    24  func TestNodeDrainWatcher_AddNodes(t *testing.T) {
    25  	ci.Parallel(t)
    26  	_, store, tracker := testNodeDrainWatcher(t)
    27  
    28  	// Create two nodes, one draining and one not draining
    29  	n1, n2 := mock.Node(), mock.Node()
    30  	n2.DrainStrategy = &structs.DrainStrategy{
    31  		DrainSpec: structs.DrainSpec{
    32  			Deadline: time.Hour,
    33  		},
    34  		ForceDeadline: time.Now().Add(time.Hour),
    35  	}
    36  
    37  	// Create a job with a running alloc on each node
    38  	job := mock.Job()
    39  	jobID := structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
    40  	must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job))
    41  
    42  	alloc1 := mock.Alloc()
    43  	alloc1.JobID = job.ID
    44  	alloc1.Job = job
    45  	alloc1.TaskGroup = job.TaskGroups[0].Name
    46  	alloc1.NodeID = n1.ID
    47  	alloc1.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)}
    48  	alloc2 := alloc1.Copy()
    49  	alloc2.ID = uuid.Generate()
    50  	alloc2.NodeID = n2.ID
    51  
    52  	must.NoError(t, store.UpsertAllocs(
    53  		structs.MsgTypeTestSetup, 102, []*structs.Allocation{alloc1, alloc2}))
    54  	must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 103, n1))
    55  	must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 104, n2))
    56  
    57  	// Only 1 node is draining, and the other should not be tracked
    58  	assertTrackerSettled(t, tracker, []string{n2.ID})
    59  
    60  	// Notifications should fire to the job watcher and deadline notifier
    61  	must.MapContainsKey(t, tracker.jobWatcher.(*MockJobWatcher).jobs, jobID)
    62  	must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n2.ID)
    63  }
    64  
    65  // TestNodeDrainWatcher_Remove tests that when a node should no longer be
    66  // tracked that we stop tracking it in the node watcher and deadline notifier.
    67  func TestNodeDrainWatcher_Remove(t *testing.T) {
    68  	ci.Parallel(t)
    69  	_, store, tracker := testNodeDrainWatcher(t)
    70  
    71  	t.Run("stop drain", func(t *testing.T) {
    72  		n, _ := testNodeDrainWatcherSetup(t, store, tracker)
    73  
    74  		index, _ := store.LatestIndex()
    75  		must.NoError(t, store.UpdateNodeDrain(
    76  			structs.MsgTypeTestSetup, index+1, n.ID, nil, false, 0, nil, nil, ""))
    77  
    78  		// Node with stopped drain should no longer be tracked
    79  		assertTrackerSettled(t, tracker, []string{})
    80  		must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
    81  	})
    82  
    83  	t.Run("delete node", func(t *testing.T) {
    84  		n, _ := testNodeDrainWatcherSetup(t, store, tracker)
    85  		index, _ := store.LatestIndex()
    86  		index++
    87  		must.NoError(t, store.DeleteNode(structs.MsgTypeTestSetup, index, []string{n.ID}))
    88  
    89  		// Node with stopped drain should no longer be tracked
    90  		assertTrackerSettled(t, tracker, []string{})
    91  		must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
    92  	})
    93  }
    94  
    95  // TestNodeDrainWatcher_NoRemove tests that when the node status changes to
    96  // down/disconnected that we don't remove it from the node watcher or deadline
    97  // notifier
    98  func TestNodeDrainWatcher_NoRemove(t *testing.T) {
    99  	ci.Parallel(t)
   100  	_, store, tracker := testNodeDrainWatcher(t)
   101  	n, _ := testNodeDrainWatcherSetup(t, store, tracker)
   102  
   103  	index, _ := store.LatestIndex()
   104  	n = n.Copy()
   105  	n.Status = structs.NodeStatusDisconnected
   106  	must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index+1, n))
   107  
   108  	assertTrackerSettled(t, tracker, []string{n.ID})
   109  	must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n.ID)
   110  
   111  	index, _ = store.LatestIndex()
   112  	n = n.Copy()
   113  	n.Status = structs.NodeStatusDown
   114  	must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index+1, n))
   115  
   116  	assertTrackerSettled(t, tracker, []string{n.ID})
   117  	must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n.ID)
   118  }
   119  
   120  // TestNodeDrainWatcher_Update_Spec tests drain spec updates emit events to the
   121  // node watcher and deadline notifier.
   122  func TestNodeDrainWatcher_Update_Spec(t *testing.T) {
   123  	ci.Parallel(t)
   124  	_, store, tracker := testNodeDrainWatcher(t)
   125  	n, _ := testNodeDrainWatcherSetup(t, store, tracker)
   126  
   127  	// Update the spec to extend the deadline
   128  	strategy := n.DrainStrategy.Copy()
   129  	strategy.DrainSpec.Deadline += time.Hour
   130  	index, _ := store.LatestIndex()
   131  	must.NoError(t, store.UpdateNodeDrain(
   132  		structs.MsgTypeTestSetup, index+1, n.ID, strategy, false, time.Now().Unix(),
   133  		&structs.NodeEvent{}, map[string]string{}, "",
   134  	))
   135  
   136  	// We should see a new event
   137  	assertTrackerSettled(t, tracker, []string{n.ID})
   138  
   139  	// Update the spec to have an infinite deadline
   140  	strategy = strategy.Copy()
   141  	strategy.DrainSpec.Deadline = 0
   142  
   143  	index, _ = store.LatestIndex()
   144  	must.NoError(t, store.UpdateNodeDrain(
   145  		structs.MsgTypeTestSetup, index+1, n.ID, strategy, false, time.Now().Unix(),
   146  		&structs.NodeEvent{}, map[string]string{}, "",
   147  	))
   148  
   149  	// We should see a new event and the node should still be tracked but no
   150  	// longer in the deadline notifier
   151  	assertTrackerSettled(t, tracker, []string{n.ID})
   152  	must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
   153  }
   154  
   155  // TestNodeDrainWatcher_Update_IsDone tests that a node drain without allocs
   156  // immediately gets unmarked as draining, and that we unset drain if an operator
   157  // drains a node with nothing on it.
   158  func TestNodeDrainWatcher_Update_IsDone(t *testing.T) {
   159  	ci.Parallel(t)
   160  	_, store, tracker := testNodeDrainWatcher(t)
   161  
   162  	// Create a draining node
   163  	n := mock.Node()
   164  	strategy := &structs.DrainStrategy{
   165  		DrainSpec:     structs.DrainSpec{Deadline: time.Hour},
   166  		ForceDeadline: time.Now().Add(time.Hour),
   167  	}
   168  	n.DrainStrategy = strategy
   169  	must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 100, n))
   170  
   171  	// There are no jobs on this node so the drain should immediately
   172  	// complete. we should no longer be tracking the node and its drain strategy
   173  	// should be cleared
   174  	assertTrackerSettled(t, tracker, []string{})
   175  	must.MapEmpty(t, tracker.jobWatcher.(*MockJobWatcher).jobs)
   176  	must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
   177  	n, _ = store.NodeByID(nil, n.ID)
   178  	must.Nil(t, n.DrainStrategy)
   179  }
   180  
   181  // TestNodeDrainWatcher_Update_DrainComplete tests that allocation updates that
   182  // complete the drain emits events to the node watcher and deadline notifier.
   183  func TestNodeDrainWatcher_Update_DrainComplete(t *testing.T) {
   184  	ci.Parallel(t)
   185  	_, store, tracker := testNodeDrainWatcher(t)
   186  	n, _ := testNodeDrainWatcherSetup(t, store, tracker)
   187  
   188  	// Simulate event: an alloc is terminal so DrainingJobWatcher.Migrated
   189  	// channel updates NodeDrainer, which updates Raft
   190  	_, err := tracker.raft.NodesDrainComplete([]string{n.ID},
   191  		structs.NewNodeEvent().
   192  			SetSubsystem(structs.NodeEventSubsystemDrain).
   193  			SetMessage(NodeDrainEventComplete))
   194  	must.NoError(t, err)
   195  
   196  	assertTrackerSettled(t, tracker, []string{})
   197  
   198  	n, _ = store.NodeByID(nil, n.ID)
   199  	must.Nil(t, n.DrainStrategy)
   200  	must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes)
   201  }
   202  
   203  func testNodeDrainWatcherSetup(
   204  	t *testing.T, store *state.StateStore, tracker *NodeDrainer) (
   205  	*structs.Node, structs.NamespacedID) {
   206  
   207  	t.Helper()
   208  	index, _ := store.LatestIndex()
   209  
   210  	// Create a job that will have an alloc on our node
   211  	job := mock.Job()
   212  	jobID := structs.NamespacedID{Namespace: job.Namespace, ID: job.ID}
   213  	index++
   214  	must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, job))
   215  
   216  	// Create draining nodes, each with its own alloc for the job running on that node
   217  	node := mock.Node()
   218  	node.DrainStrategy = &structs.DrainStrategy{
   219  		DrainSpec:     structs.DrainSpec{Deadline: time.Hour},
   220  		ForceDeadline: time.Now().Add(time.Hour),
   221  	}
   222  
   223  	alloc := mock.Alloc()
   224  	alloc.JobID = job.ID
   225  	alloc.Job = job
   226  	alloc.TaskGroup = job.TaskGroups[0].Name
   227  	alloc.NodeID = node.ID
   228  	alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)}
   229  	index++
   230  	must.NoError(t, store.UpsertAllocs(
   231  		structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc}))
   232  
   233  	index++
   234  	must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node))
   235  
   236  	// Node should be tracked and notifications should fire to the job watcher
   237  	// and deadline notifier
   238  	assertTrackerSettled(t, tracker, []string{node.ID})
   239  	must.MapContainsKey(t, tracker.jobWatcher.(*MockJobWatcher).jobs, jobID)
   240  	must.MapContainsKeys(t,
   241  		tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, []string{node.ID})
   242  
   243  	return node, jobID
   244  }
   245  
   246  func assertTrackerSettled(t *testing.T, tracker *NodeDrainer, nodeIDs []string) {
   247  	t.Helper()
   248  
   249  	must.Wait(t, wait.InitialSuccess(
   250  		wait.Timeout(100*time.Millisecond),
   251  		wait.Gap(time.Millisecond),
   252  		wait.TestFunc(func() (bool, error) {
   253  			if len(tracker.TrackedNodes()) != len(nodeIDs) {
   254  				return false, fmt.Errorf(
   255  					"expected nodes %v to become marked draining, got %d",
   256  					nodeIDs, len(tracker.TrackedNodes()))
   257  			}
   258  			return true, nil
   259  		}),
   260  	))
   261  
   262  	must.Wait(t, wait.ContinualSuccess(
   263  		wait.Timeout(100*time.Millisecond),
   264  		wait.Gap(10*time.Millisecond),
   265  		wait.TestFunc(func() (bool, error) {
   266  			if len(tracker.TrackedNodes()) != len(nodeIDs) {
   267  				return false, fmt.Errorf(
   268  					"expected nodes %v to stay marked draining, got %d",
   269  					nodeIDs, len(tracker.TrackedNodes()))
   270  			}
   271  			return true, nil
   272  		}),
   273  	))
   274  
   275  	for _, nodeID := range nodeIDs {
   276  		must.MapContainsKey(t, tracker.TrackedNodes(), nodeID)
   277  	}
   278  }