github.com/hernad/nomad@v1.6.112/nomad/volumewatcher/volumes_watcher_test.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package volumewatcher
     5  
     6  import (
     7  	"testing"
     8  	"time"
     9  
    10  	memdb "github.com/hashicorp/go-memdb"
    11  	"github.com/hernad/nomad/ci"
    12  	"github.com/hernad/nomad/helper/testlog"
    13  	"github.com/hernad/nomad/nomad/mock"
    14  	"github.com/hernad/nomad/nomad/state"
    15  	"github.com/hernad/nomad/nomad/structs"
    16  	"github.com/shoenig/test/must"
    17  	"github.com/stretchr/testify/require"
    18  )
    19  
    20  // TestVolumeWatch_EnableDisable tests the watcher registration logic that needs
    21  // to happen during leader step-up/step-down
    22  func TestVolumeWatch_EnableDisable(t *testing.T) {
    23  	ci.Parallel(t)
    24  
    25  	srv := &MockRPCServer{}
    26  	srv.state = state.TestStateStore(t)
    27  	index := uint64(100)
    28  
    29  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
    30  	watcher.quiescentTimeout = 100 * time.Millisecond
    31  	watcher.SetEnabled(true, srv.State(), "")
    32  
    33  	plugin := mock.CSIPlugin()
    34  	node := testNode(plugin, srv.State())
    35  	alloc := mock.Alloc()
    36  	alloc.ClientStatus = structs.AllocClientStatusComplete
    37  
    38  	vol := testVolume(plugin, alloc, node.ID)
    39  
    40  	index++
    41  	err := srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
    42  	require.NoError(t, err)
    43  
    44  	// need to have just enough of a volume and claim in place so that
    45  	// the watcher doesn't immediately stop and unload itself
    46  	claim := &structs.CSIVolumeClaim{
    47  		Mode:  structs.CSIVolumeClaimGC,
    48  		State: structs.CSIVolumeClaimStateNodeDetached,
    49  	}
    50  	index++
    51  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
    52  	require.NoError(t, err)
    53  	require.Eventually(t, func() bool {
    54  		watcher.wlock.RLock()
    55  		defer watcher.wlock.RUnlock()
    56  		return 1 == len(watcher.watchers)
    57  	}, time.Second, 10*time.Millisecond)
    58  
    59  	watcher.SetEnabled(false, nil, "")
    60  	watcher.wlock.RLock()
    61  	defer watcher.wlock.RUnlock()
    62  	require.Equal(t, 0, len(watcher.watchers))
    63  }
    64  
    65  // TestVolumeWatch_LeadershipTransition tests the correct behavior of
    66  // claim reaping across leader step-up/step-down
    67  func TestVolumeWatch_LeadershipTransition(t *testing.T) {
    68  	ci.Parallel(t)
    69  
    70  	srv := &MockRPCServer{}
    71  	srv.state = state.TestStateStore(t)
    72  	index := uint64(100)
    73  
    74  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
    75  	watcher.quiescentTimeout = 100 * time.Millisecond
    76  
    77  	plugin := mock.CSIPlugin()
    78  	node := testNode(plugin, srv.State())
    79  	alloc := mock.Alloc()
    80  	alloc.ClientStatus = structs.AllocClientStatusRunning
    81  	vol := testVolume(plugin, alloc, node.ID)
    82  
    83  	index++
    84  	err := srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index,
    85  		[]*structs.Allocation{alloc})
    86  	require.NoError(t, err)
    87  
    88  	watcher.SetEnabled(true, srv.State(), "")
    89  
    90  	index++
    91  	err = srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
    92  	require.NoError(t, err)
    93  
    94  	// we should get or start up a watcher when we get an update for
    95  	// the volume from the state store
    96  	require.Eventually(t, func() bool {
    97  		watcher.wlock.RLock()
    98  		defer watcher.wlock.RUnlock()
    99  		return 1 == len(watcher.watchers)
   100  	}, time.Second, 10*time.Millisecond)
   101  
   102  	vol, _ = srv.State().CSIVolumeByID(nil, vol.Namespace, vol.ID)
   103  	require.Len(t, vol.PastClaims, 0, "expected to have 0 PastClaims")
   104  	require.Equal(t, srv.countCSIUnpublish, 0, "expected no CSI.Unpublish RPC calls")
   105  
   106  	// trying to test a dropped watch is racy, so to reliably simulate
   107  	// this condition, step-down the watcher first and then perform
   108  	// the writes to the volume before starting the new watcher. no
   109  	// watches for that change will fire on the new watcher
   110  
   111  	// step-down (this is sync)
   112  	watcher.SetEnabled(false, nil, "")
   113  	watcher.wlock.RLock()
   114  	require.Equal(t, 0, len(watcher.watchers))
   115  	watcher.wlock.RUnlock()
   116  
   117  	// allocation is now invalid
   118  	index++
   119  	err = srv.State().DeleteEval(index, []string{}, []string{alloc.ID}, false)
   120  	require.NoError(t, err)
   121  
   122  	// emit a GC so that we have a volume change that's dropped
   123  	claim := &structs.CSIVolumeClaim{
   124  		AllocationID: alloc.ID,
   125  		NodeID:       node.ID,
   126  		Mode:         structs.CSIVolumeClaimGC,
   127  		State:        structs.CSIVolumeClaimStateUnpublishing,
   128  	}
   129  	index++
   130  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   131  	require.NoError(t, err)
   132  
   133  	// create a new watcher and enable it to simulate the leadership
   134  	// transition
   135  	watcher = NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   136  	watcher.quiescentTimeout = 100 * time.Millisecond
   137  	watcher.SetEnabled(true, srv.State(), "")
   138  
   139  	require.Eventually(t, func() bool {
   140  		watcher.wlock.RLock()
   141  		defer watcher.wlock.RUnlock()
   142  		return 0 == len(watcher.watchers)
   143  	}, time.Second, 10*time.Millisecond)
   144  
   145  	vol, _ = srv.State().CSIVolumeByID(nil, vol.Namespace, vol.ID)
   146  	require.Len(t, vol.PastClaims, 1, "expected to have 1 PastClaim")
   147  	require.Equal(t, srv.countCSIUnpublish, 1, "expected CSI.Unpublish RPC to be called")
   148  }
   149  
   150  // TestVolumeWatch_StartStop tests the start and stop of the watcher when
   151  // it receives notifcations and has completed its work
   152  func TestVolumeWatch_StartStop(t *testing.T) {
   153  	ci.Parallel(t)
   154  
   155  	srv := &MockStatefulRPCServer{}
   156  	srv.state = state.TestStateStore(t)
   157  	index := uint64(100)
   158  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   159  	watcher.quiescentTimeout = 100 * time.Millisecond
   160  
   161  	watcher.SetEnabled(true, srv.State(), "")
   162  	require.Equal(t, 0, len(watcher.watchers))
   163  
   164  	plugin := mock.CSIPlugin()
   165  	node := testNode(plugin, srv.State())
   166  	alloc1 := mock.Alloc()
   167  	alloc1.ClientStatus = structs.AllocClientStatusRunning
   168  	alloc2 := mock.Alloc()
   169  	alloc2.Job = alloc1.Job
   170  	alloc2.ClientStatus = structs.AllocClientStatusRunning
   171  	index++
   172  	err := srv.State().UpsertJob(structs.MsgTypeTestSetup, index, nil, alloc1.Job)
   173  	require.NoError(t, err)
   174  	index++
   175  	err = srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1, alloc2})
   176  	require.NoError(t, err)
   177  
   178  	// register a volume and an unused volume
   179  	vol := testVolume(plugin, alloc1, node.ID)
   180  	index++
   181  	err = srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
   182  	require.NoError(t, err)
   183  
   184  	// assert we get a watcher; there are no claims so it should immediately stop
   185  	require.Eventually(t, func() bool {
   186  		watcher.wlock.RLock()
   187  		defer watcher.wlock.RUnlock()
   188  		return 0 == len(watcher.watchers)
   189  	}, time.Second*2, 10*time.Millisecond)
   190  
   191  	// claim the volume for both allocs
   192  	claim := &structs.CSIVolumeClaim{
   193  		AllocationID: alloc1.ID,
   194  		NodeID:       node.ID,
   195  		Mode:         structs.CSIVolumeClaimRead,
   196  		AccessMode:   structs.CSIVolumeAccessModeMultiNodeReader,
   197  	}
   198  
   199  	index++
   200  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   201  	require.NoError(t, err)
   202  	claim.AllocationID = alloc2.ID
   203  	index++
   204  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   205  	require.NoError(t, err)
   206  
   207  	// reap the volume and assert nothing has happened
   208  	claim = &structs.CSIVolumeClaim{
   209  		AllocationID: alloc1.ID,
   210  		NodeID:       node.ID,
   211  	}
   212  	index++
   213  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   214  	require.NoError(t, err)
   215  
   216  	ws := memdb.NewWatchSet()
   217  	vol, _ = srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
   218  	require.Equal(t, 2, len(vol.ReadAllocs))
   219  
   220  	// alloc becomes terminal
   221  	alloc1 = alloc1.Copy()
   222  	alloc1.ClientStatus = structs.AllocClientStatusComplete
   223  	index++
   224  	err = srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1})
   225  	require.NoError(t, err)
   226  	index++
   227  	claim.State = structs.CSIVolumeClaimStateReadyToFree
   228  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   229  	require.NoError(t, err)
   230  
   231  	// watcher stops and 1 claim has been released
   232  	require.Eventually(t, func() bool {
   233  		watcher.wlock.RLock()
   234  		defer watcher.wlock.RUnlock()
   235  		return 0 == len(watcher.watchers)
   236  	}, time.Second*5, 10*time.Millisecond)
   237  
   238  	vol, _ = srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
   239  	must.Eq(t, 1, len(vol.ReadAllocs))
   240  	must.Eq(t, 0, len(vol.PastClaims))
   241  }
   242  
   243  // TestVolumeWatch_Delete tests the stop of the watcher when it receives
   244  // notifications around a deleted volume
   245  func TestVolumeWatch_Delete(t *testing.T) {
   246  	ci.Parallel(t)
   247  
   248  	srv := &MockStatefulRPCServer{}
   249  	srv.state = state.TestStateStore(t)
   250  	index := uint64(100)
   251  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   252  	watcher.quiescentTimeout = 100 * time.Millisecond
   253  
   254  	watcher.SetEnabled(true, srv.State(), "")
   255  	must.Eq(t, 0, len(watcher.watchers))
   256  
   257  	// register an unused volume
   258  	plugin := mock.CSIPlugin()
   259  	vol := mock.CSIVolume(plugin)
   260  	index++
   261  	must.NoError(t, srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol}))
   262  
   263  	// assert we get a watcher; there are no claims so it should immediately stop
   264  	require.Eventually(t, func() bool {
   265  		watcher.wlock.RLock()
   266  		defer watcher.wlock.RUnlock()
   267  		return 0 == len(watcher.watchers)
   268  	}, time.Second*2, 10*time.Millisecond)
   269  
   270  	// write a GC claim to the volume and then immediately delete, to
   271  	// potentially hit the race condition between updates and deletes
   272  	index++
   273  	must.NoError(t, srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID,
   274  		&structs.CSIVolumeClaim{
   275  			Mode:  structs.CSIVolumeClaimGC,
   276  			State: structs.CSIVolumeClaimStateReadyToFree,
   277  		}))
   278  
   279  	index++
   280  	must.NoError(t, srv.State().CSIVolumeDeregister(
   281  		index, vol.Namespace, []string{vol.ID}, false))
   282  
   283  	// the watcher should not be running
   284  	require.Eventually(t, func() bool {
   285  		watcher.wlock.RLock()
   286  		defer watcher.wlock.RUnlock()
   287  		return 0 == len(watcher.watchers)
   288  	}, time.Second*5, 10*time.Millisecond)
   289  
   290  }
   291  
   292  // TestVolumeWatch_RegisterDeregister tests the start and stop of
   293  // watchers around registration
   294  func TestVolumeWatch_RegisterDeregister(t *testing.T) {
   295  	ci.Parallel(t)
   296  
   297  	srv := &MockStatefulRPCServer{}
   298  	srv.state = state.TestStateStore(t)
   299  
   300  	index := uint64(100)
   301  
   302  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   303  	watcher.quiescentTimeout = 10 * time.Millisecond
   304  
   305  	watcher.SetEnabled(true, srv.State(), "")
   306  	require.Equal(t, 0, len(watcher.watchers))
   307  
   308  	plugin := mock.CSIPlugin()
   309  	alloc := mock.Alloc()
   310  	alloc.ClientStatus = structs.AllocClientStatusComplete
   311  
   312  	// register a volume without claims
   313  	vol := mock.CSIVolume(plugin)
   314  	index++
   315  	err := srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
   316  	require.NoError(t, err)
   317  
   318  	// watcher should stop
   319  	require.Eventually(t, func() bool {
   320  		watcher.wlock.RLock()
   321  		defer watcher.wlock.RUnlock()
   322  		return 0 == len(watcher.watchers)
   323  	}, time.Second, 10*time.Millisecond)
   324  }