github.com/hernad/nomad@v1.6.112/nomad/volumewatcher/volumes_watcher_test.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package volumewatcher 5 6 import ( 7 "testing" 8 "time" 9 10 memdb "github.com/hashicorp/go-memdb" 11 "github.com/hernad/nomad/ci" 12 "github.com/hernad/nomad/helper/testlog" 13 "github.com/hernad/nomad/nomad/mock" 14 "github.com/hernad/nomad/nomad/state" 15 "github.com/hernad/nomad/nomad/structs" 16 "github.com/shoenig/test/must" 17 "github.com/stretchr/testify/require" 18 ) 19 20 // TestVolumeWatch_EnableDisable tests the watcher registration logic that needs 21 // to happen during leader step-up/step-down 22 func TestVolumeWatch_EnableDisable(t *testing.T) { 23 ci.Parallel(t) 24 25 srv := &MockRPCServer{} 26 srv.state = state.TestStateStore(t) 27 index := uint64(100) 28 29 watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "") 30 watcher.quiescentTimeout = 100 * time.Millisecond 31 watcher.SetEnabled(true, srv.State(), "") 32 33 plugin := mock.CSIPlugin() 34 node := testNode(plugin, srv.State()) 35 alloc := mock.Alloc() 36 alloc.ClientStatus = structs.AllocClientStatusComplete 37 38 vol := testVolume(plugin, alloc, node.ID) 39 40 index++ 41 err := srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol}) 42 require.NoError(t, err) 43 44 // need to have just enough of a volume and claim in place so that 45 // the watcher doesn't immediately stop and unload itself 46 claim := &structs.CSIVolumeClaim{ 47 Mode: structs.CSIVolumeClaimGC, 48 State: structs.CSIVolumeClaimStateNodeDetached, 49 } 50 index++ 51 err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim) 52 require.NoError(t, err) 53 require.Eventually(t, func() bool { 54 watcher.wlock.RLock() 55 defer watcher.wlock.RUnlock() 56 return 1 == len(watcher.watchers) 57 }, time.Second, 10*time.Millisecond) 58 59 watcher.SetEnabled(false, nil, "") 60 watcher.wlock.RLock() 61 defer watcher.wlock.RUnlock() 62 require.Equal(t, 0, len(watcher.watchers)) 63 } 64 65 // TestVolumeWatch_LeadershipTransition tests the correct behavior of 66 // claim reaping across leader step-up/step-down 67 func TestVolumeWatch_LeadershipTransition(t *testing.T) { 68 ci.Parallel(t) 69 70 srv := &MockRPCServer{} 71 srv.state = state.TestStateStore(t) 72 index := uint64(100) 73 74 watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "") 75 watcher.quiescentTimeout = 100 * time.Millisecond 76 77 plugin := mock.CSIPlugin() 78 node := testNode(plugin, srv.State()) 79 alloc := mock.Alloc() 80 alloc.ClientStatus = structs.AllocClientStatusRunning 81 vol := testVolume(plugin, alloc, node.ID) 82 83 index++ 84 err := srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, 85 []*structs.Allocation{alloc}) 86 require.NoError(t, err) 87 88 watcher.SetEnabled(true, srv.State(), "") 89 90 index++ 91 err = srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol}) 92 require.NoError(t, err) 93 94 // we should get or start up a watcher when we get an update for 95 // the volume from the state store 96 require.Eventually(t, func() bool { 97 watcher.wlock.RLock() 98 defer watcher.wlock.RUnlock() 99 return 1 == len(watcher.watchers) 100 }, time.Second, 10*time.Millisecond) 101 102 vol, _ = srv.State().CSIVolumeByID(nil, vol.Namespace, vol.ID) 103 require.Len(t, vol.PastClaims, 0, "expected to have 0 PastClaims") 104 require.Equal(t, srv.countCSIUnpublish, 0, "expected no CSI.Unpublish RPC calls") 105 106 // trying to test a dropped watch is racy, so to reliably simulate 107 // this condition, step-down the watcher first and then perform 108 // the writes to the volume before starting the new watcher. no 109 // watches for that change will fire on the new watcher 110 111 // step-down (this is sync) 112 watcher.SetEnabled(false, nil, "") 113 watcher.wlock.RLock() 114 require.Equal(t, 0, len(watcher.watchers)) 115 watcher.wlock.RUnlock() 116 117 // allocation is now invalid 118 index++ 119 err = srv.State().DeleteEval(index, []string{}, []string{alloc.ID}, false) 120 require.NoError(t, err) 121 122 // emit a GC so that we have a volume change that's dropped 123 claim := &structs.CSIVolumeClaim{ 124 AllocationID: alloc.ID, 125 NodeID: node.ID, 126 Mode: structs.CSIVolumeClaimGC, 127 State: structs.CSIVolumeClaimStateUnpublishing, 128 } 129 index++ 130 err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim) 131 require.NoError(t, err) 132 133 // create a new watcher and enable it to simulate the leadership 134 // transition 135 watcher = NewVolumesWatcher(testlog.HCLogger(t), srv, "") 136 watcher.quiescentTimeout = 100 * time.Millisecond 137 watcher.SetEnabled(true, srv.State(), "") 138 139 require.Eventually(t, func() bool { 140 watcher.wlock.RLock() 141 defer watcher.wlock.RUnlock() 142 return 0 == len(watcher.watchers) 143 }, time.Second, 10*time.Millisecond) 144 145 vol, _ = srv.State().CSIVolumeByID(nil, vol.Namespace, vol.ID) 146 require.Len(t, vol.PastClaims, 1, "expected to have 1 PastClaim") 147 require.Equal(t, srv.countCSIUnpublish, 1, "expected CSI.Unpublish RPC to be called") 148 } 149 150 // TestVolumeWatch_StartStop tests the start and stop of the watcher when 151 // it receives notifcations and has completed its work 152 func TestVolumeWatch_StartStop(t *testing.T) { 153 ci.Parallel(t) 154 155 srv := &MockStatefulRPCServer{} 156 srv.state = state.TestStateStore(t) 157 index := uint64(100) 158 watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "") 159 watcher.quiescentTimeout = 100 * time.Millisecond 160 161 watcher.SetEnabled(true, srv.State(), "") 162 require.Equal(t, 0, len(watcher.watchers)) 163 164 plugin := mock.CSIPlugin() 165 node := testNode(plugin, srv.State()) 166 alloc1 := mock.Alloc() 167 alloc1.ClientStatus = structs.AllocClientStatusRunning 168 alloc2 := mock.Alloc() 169 alloc2.Job = alloc1.Job 170 alloc2.ClientStatus = structs.AllocClientStatusRunning 171 index++ 172 err := srv.State().UpsertJob(structs.MsgTypeTestSetup, index, nil, alloc1.Job) 173 require.NoError(t, err) 174 index++ 175 err = srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1, alloc2}) 176 require.NoError(t, err) 177 178 // register a volume and an unused volume 179 vol := testVolume(plugin, alloc1, node.ID) 180 index++ 181 err = srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol}) 182 require.NoError(t, err) 183 184 // assert we get a watcher; there are no claims so it should immediately stop 185 require.Eventually(t, func() bool { 186 watcher.wlock.RLock() 187 defer watcher.wlock.RUnlock() 188 return 0 == len(watcher.watchers) 189 }, time.Second*2, 10*time.Millisecond) 190 191 // claim the volume for both allocs 192 claim := &structs.CSIVolumeClaim{ 193 AllocationID: alloc1.ID, 194 NodeID: node.ID, 195 Mode: structs.CSIVolumeClaimRead, 196 AccessMode: structs.CSIVolumeAccessModeMultiNodeReader, 197 } 198 199 index++ 200 err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim) 201 require.NoError(t, err) 202 claim.AllocationID = alloc2.ID 203 index++ 204 err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim) 205 require.NoError(t, err) 206 207 // reap the volume and assert nothing has happened 208 claim = &structs.CSIVolumeClaim{ 209 AllocationID: alloc1.ID, 210 NodeID: node.ID, 211 } 212 index++ 213 err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim) 214 require.NoError(t, err) 215 216 ws := memdb.NewWatchSet() 217 vol, _ = srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID) 218 require.Equal(t, 2, len(vol.ReadAllocs)) 219 220 // alloc becomes terminal 221 alloc1 = alloc1.Copy() 222 alloc1.ClientStatus = structs.AllocClientStatusComplete 223 index++ 224 err = srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1}) 225 require.NoError(t, err) 226 index++ 227 claim.State = structs.CSIVolumeClaimStateReadyToFree 228 err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim) 229 require.NoError(t, err) 230 231 // watcher stops and 1 claim has been released 232 require.Eventually(t, func() bool { 233 watcher.wlock.RLock() 234 defer watcher.wlock.RUnlock() 235 return 0 == len(watcher.watchers) 236 }, time.Second*5, 10*time.Millisecond) 237 238 vol, _ = srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID) 239 must.Eq(t, 1, len(vol.ReadAllocs)) 240 must.Eq(t, 0, len(vol.PastClaims)) 241 } 242 243 // TestVolumeWatch_Delete tests the stop of the watcher when it receives 244 // notifications around a deleted volume 245 func TestVolumeWatch_Delete(t *testing.T) { 246 ci.Parallel(t) 247 248 srv := &MockStatefulRPCServer{} 249 srv.state = state.TestStateStore(t) 250 index := uint64(100) 251 watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "") 252 watcher.quiescentTimeout = 100 * time.Millisecond 253 254 watcher.SetEnabled(true, srv.State(), "") 255 must.Eq(t, 0, len(watcher.watchers)) 256 257 // register an unused volume 258 plugin := mock.CSIPlugin() 259 vol := mock.CSIVolume(plugin) 260 index++ 261 must.NoError(t, srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})) 262 263 // assert we get a watcher; there are no claims so it should immediately stop 264 require.Eventually(t, func() bool { 265 watcher.wlock.RLock() 266 defer watcher.wlock.RUnlock() 267 return 0 == len(watcher.watchers) 268 }, time.Second*2, 10*time.Millisecond) 269 270 // write a GC claim to the volume and then immediately delete, to 271 // potentially hit the race condition between updates and deletes 272 index++ 273 must.NoError(t, srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, 274 &structs.CSIVolumeClaim{ 275 Mode: structs.CSIVolumeClaimGC, 276 State: structs.CSIVolumeClaimStateReadyToFree, 277 })) 278 279 index++ 280 must.NoError(t, srv.State().CSIVolumeDeregister( 281 index, vol.Namespace, []string{vol.ID}, false)) 282 283 // the watcher should not be running 284 require.Eventually(t, func() bool { 285 watcher.wlock.RLock() 286 defer watcher.wlock.RUnlock() 287 return 0 == len(watcher.watchers) 288 }, time.Second*5, 10*time.Millisecond) 289 290 } 291 292 // TestVolumeWatch_RegisterDeregister tests the start and stop of 293 // watchers around registration 294 func TestVolumeWatch_RegisterDeregister(t *testing.T) { 295 ci.Parallel(t) 296 297 srv := &MockStatefulRPCServer{} 298 srv.state = state.TestStateStore(t) 299 300 index := uint64(100) 301 302 watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "") 303 watcher.quiescentTimeout = 10 * time.Millisecond 304 305 watcher.SetEnabled(true, srv.State(), "") 306 require.Equal(t, 0, len(watcher.watchers)) 307 308 plugin := mock.CSIPlugin() 309 alloc := mock.Alloc() 310 alloc.ClientStatus = structs.AllocClientStatusComplete 311 312 // register a volume without claims 313 vol := mock.CSIVolume(plugin) 314 index++ 315 err := srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol}) 316 require.NoError(t, err) 317 318 // watcher should stop 319 require.Eventually(t, func() bool { 320 watcher.wlock.RLock() 321 defer watcher.wlock.RUnlock() 322 return 0 == len(watcher.watchers) 323 }, time.Second, 10*time.Millisecond) 324 }