github.com/hernad/nomad@v1.6.112/nomad/drainer/watch_nodes_test.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package drainer 5 6 import ( 7 "fmt" 8 "testing" 9 "time" 10 11 "github.com/shoenig/test/must" 12 "github.com/shoenig/test/wait" 13 14 "github.com/hernad/nomad/ci" 15 "github.com/hernad/nomad/helper/pointer" 16 "github.com/hernad/nomad/helper/uuid" 17 "github.com/hernad/nomad/nomad/mock" 18 "github.com/hernad/nomad/nomad/state" 19 "github.com/hernad/nomad/nomad/structs" 20 ) 21 22 // TestNodeDrainWatcher_AddNodes tests that new nodes are added to the node 23 // watcher and deadline notifier, but only if they have a drain spec. 24 func TestNodeDrainWatcher_AddNodes(t *testing.T) { 25 ci.Parallel(t) 26 _, store, tracker := testNodeDrainWatcher(t) 27 28 // Create two nodes, one draining and one not draining 29 n1, n2 := mock.Node(), mock.Node() 30 n2.DrainStrategy = &structs.DrainStrategy{ 31 DrainSpec: structs.DrainSpec{ 32 Deadline: time.Hour, 33 }, 34 ForceDeadline: time.Now().Add(time.Hour), 35 } 36 37 // Create a job with a running alloc on each node 38 job := mock.Job() 39 jobID := structs.NamespacedID{Namespace: job.Namespace, ID: job.ID} 40 must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, 101, nil, job)) 41 42 alloc1 := mock.Alloc() 43 alloc1.JobID = job.ID 44 alloc1.Job = job 45 alloc1.TaskGroup = job.TaskGroups[0].Name 46 alloc1.NodeID = n1.ID 47 alloc1.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)} 48 alloc2 := alloc1.Copy() 49 alloc2.ID = uuid.Generate() 50 alloc2.NodeID = n2.ID 51 52 must.NoError(t, store.UpsertAllocs( 53 structs.MsgTypeTestSetup, 102, []*structs.Allocation{alloc1, alloc2})) 54 must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 103, n1)) 55 must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 104, n2)) 56 57 // Only 1 node is draining, and the other should not be tracked 58 assertTrackerSettled(t, tracker, []string{n2.ID}) 59 60 // Notifications should fire to the job watcher and deadline notifier 61 must.MapContainsKey(t, tracker.jobWatcher.(*MockJobWatcher).jobs, jobID) 62 must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n2.ID) 63 } 64 65 // TestNodeDrainWatcher_Remove tests that when a node should no longer be 66 // tracked that we stop tracking it in the node watcher and deadline notifier. 67 func TestNodeDrainWatcher_Remove(t *testing.T) { 68 ci.Parallel(t) 69 _, store, tracker := testNodeDrainWatcher(t) 70 71 t.Run("stop drain", func(t *testing.T) { 72 n, _ := testNodeDrainWatcherSetup(t, store, tracker) 73 74 index, _ := store.LatestIndex() 75 must.NoError(t, store.UpdateNodeDrain( 76 structs.MsgTypeTestSetup, index+1, n.ID, nil, false, 0, nil, nil, "")) 77 78 // Node with stopped drain should no longer be tracked 79 assertTrackerSettled(t, tracker, []string{}) 80 must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes) 81 }) 82 83 t.Run("delete node", func(t *testing.T) { 84 n, _ := testNodeDrainWatcherSetup(t, store, tracker) 85 index, _ := store.LatestIndex() 86 index++ 87 must.NoError(t, store.DeleteNode(structs.MsgTypeTestSetup, index, []string{n.ID})) 88 89 // Node with stopped drain should no longer be tracked 90 assertTrackerSettled(t, tracker, []string{}) 91 must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes) 92 }) 93 } 94 95 // TestNodeDrainWatcher_NoRemove tests that when the node status changes to 96 // down/disconnected that we don't remove it from the node watcher or deadline 97 // notifier 98 func TestNodeDrainWatcher_NoRemove(t *testing.T) { 99 ci.Parallel(t) 100 _, store, tracker := testNodeDrainWatcher(t) 101 n, _ := testNodeDrainWatcherSetup(t, store, tracker) 102 103 index, _ := store.LatestIndex() 104 n = n.Copy() 105 n.Status = structs.NodeStatusDisconnected 106 must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index+1, n)) 107 108 assertTrackerSettled(t, tracker, []string{n.ID}) 109 must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n.ID) 110 111 index, _ = store.LatestIndex() 112 n = n.Copy() 113 n.Status = structs.NodeStatusDown 114 must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index+1, n)) 115 116 assertTrackerSettled(t, tracker, []string{n.ID}) 117 must.MapContainsKey(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, n.ID) 118 } 119 120 // TestNodeDrainWatcher_Update_Spec tests drain spec updates emit events to the 121 // node watcher and deadline notifier. 122 func TestNodeDrainWatcher_Update_Spec(t *testing.T) { 123 ci.Parallel(t) 124 _, store, tracker := testNodeDrainWatcher(t) 125 n, _ := testNodeDrainWatcherSetup(t, store, tracker) 126 127 // Update the spec to extend the deadline 128 strategy := n.DrainStrategy.Copy() 129 strategy.DrainSpec.Deadline += time.Hour 130 index, _ := store.LatestIndex() 131 must.NoError(t, store.UpdateNodeDrain( 132 structs.MsgTypeTestSetup, index+1, n.ID, strategy, false, time.Now().Unix(), 133 &structs.NodeEvent{}, map[string]string{}, "", 134 )) 135 136 // We should see a new event 137 assertTrackerSettled(t, tracker, []string{n.ID}) 138 139 // Update the spec to have an infinite deadline 140 strategy = strategy.Copy() 141 strategy.DrainSpec.Deadline = 0 142 143 index, _ = store.LatestIndex() 144 must.NoError(t, store.UpdateNodeDrain( 145 structs.MsgTypeTestSetup, index+1, n.ID, strategy, false, time.Now().Unix(), 146 &structs.NodeEvent{}, map[string]string{}, "", 147 )) 148 149 // We should see a new event and the node should still be tracked but no 150 // longer in the deadline notifier 151 assertTrackerSettled(t, tracker, []string{n.ID}) 152 must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes) 153 } 154 155 // TestNodeDrainWatcher_Update_IsDone tests that a node drain without allocs 156 // immediately gets unmarked as draining, and that we unset drain if an operator 157 // drains a node with nothing on it. 158 func TestNodeDrainWatcher_Update_IsDone(t *testing.T) { 159 ci.Parallel(t) 160 _, store, tracker := testNodeDrainWatcher(t) 161 162 // Create a draining node 163 n := mock.Node() 164 strategy := &structs.DrainStrategy{ 165 DrainSpec: structs.DrainSpec{Deadline: time.Hour}, 166 ForceDeadline: time.Now().Add(time.Hour), 167 } 168 n.DrainStrategy = strategy 169 must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 100, n)) 170 171 // There are no jobs on this node so the drain should immediately 172 // complete. we should no longer be tracking the node and its drain strategy 173 // should be cleared 174 assertTrackerSettled(t, tracker, []string{}) 175 must.MapEmpty(t, tracker.jobWatcher.(*MockJobWatcher).jobs) 176 must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes) 177 n, _ = store.NodeByID(nil, n.ID) 178 must.Nil(t, n.DrainStrategy) 179 } 180 181 // TestNodeDrainWatcher_Update_DrainComplete tests that allocation updates that 182 // complete the drain emits events to the node watcher and deadline notifier. 183 func TestNodeDrainWatcher_Update_DrainComplete(t *testing.T) { 184 ci.Parallel(t) 185 _, store, tracker := testNodeDrainWatcher(t) 186 n, _ := testNodeDrainWatcherSetup(t, store, tracker) 187 188 // Simulate event: an alloc is terminal so DrainingJobWatcher.Migrated 189 // channel updates NodeDrainer, which updates Raft 190 _, err := tracker.raft.NodesDrainComplete([]string{n.ID}, 191 structs.NewNodeEvent(). 192 SetSubsystem(structs.NodeEventSubsystemDrain). 193 SetMessage(NodeDrainEventComplete)) 194 must.NoError(t, err) 195 196 assertTrackerSettled(t, tracker, []string{}) 197 198 n, _ = store.NodeByID(nil, n.ID) 199 must.Nil(t, n.DrainStrategy) 200 must.MapEmpty(t, tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes) 201 } 202 203 func testNodeDrainWatcherSetup( 204 t *testing.T, store *state.StateStore, tracker *NodeDrainer) ( 205 *structs.Node, structs.NamespacedID) { 206 207 t.Helper() 208 index, _ := store.LatestIndex() 209 210 // Create a job that will have an alloc on our node 211 job := mock.Job() 212 jobID := structs.NamespacedID{Namespace: job.Namespace, ID: job.ID} 213 index++ 214 must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, job)) 215 216 // Create draining nodes, each with its own alloc for the job running on that node 217 node := mock.Node() 218 node.DrainStrategy = &structs.DrainStrategy{ 219 DrainSpec: structs.DrainSpec{Deadline: time.Hour}, 220 ForceDeadline: time.Now().Add(time.Hour), 221 } 222 223 alloc := mock.Alloc() 224 alloc.JobID = job.ID 225 alloc.Job = job 226 alloc.TaskGroup = job.TaskGroups[0].Name 227 alloc.NodeID = node.ID 228 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: pointer.Of(true)} 229 index++ 230 must.NoError(t, store.UpsertAllocs( 231 structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc})) 232 233 index++ 234 must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) 235 236 // Node should be tracked and notifications should fire to the job watcher 237 // and deadline notifier 238 assertTrackerSettled(t, tracker, []string{node.ID}) 239 must.MapContainsKey(t, tracker.jobWatcher.(*MockJobWatcher).jobs, jobID) 240 must.MapContainsKeys(t, 241 tracker.deadlineNotifier.(*MockDeadlineNotifier).nodes, []string{node.ID}) 242 243 return node, jobID 244 } 245 246 func assertTrackerSettled(t *testing.T, tracker *NodeDrainer, nodeIDs []string) { 247 t.Helper() 248 249 must.Wait(t, wait.InitialSuccess( 250 wait.Timeout(100*time.Millisecond), 251 wait.Gap(time.Millisecond), 252 wait.TestFunc(func() (bool, error) { 253 if len(tracker.TrackedNodes()) != len(nodeIDs) { 254 return false, fmt.Errorf( 255 "expected nodes %v to become marked draining, got %d", 256 nodeIDs, len(tracker.TrackedNodes())) 257 } 258 return true, nil 259 }), 260 )) 261 262 must.Wait(t, wait.ContinualSuccess( 263 wait.Timeout(100*time.Millisecond), 264 wait.Gap(10*time.Millisecond), 265 wait.TestFunc(func() (bool, error) { 266 if len(tracker.TrackedNodes()) != len(nodeIDs) { 267 return false, fmt.Errorf( 268 "expected nodes %v to stay marked draining, got %d", 269 nodeIDs, len(tracker.TrackedNodes())) 270 } 271 return true, nil 272 }), 273 )) 274 275 for _, nodeID := range nodeIDs { 276 must.MapContainsKey(t, tracker.TrackedNodes(), nodeID) 277 } 278 }