github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/volumewatcher/volumes_watcher_test.go (about)

     1  package volumewatcher
     2  
     3  import (
     4  	"testing"
     5  	"time"
     6  
     7  	memdb "github.com/hashicorp/go-memdb"
     8  	"github.com/hashicorp/nomad/ci"
     9  	"github.com/hashicorp/nomad/helper/testlog"
    10  	"github.com/hashicorp/nomad/nomad/mock"
    11  	"github.com/hashicorp/nomad/nomad/state"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  	"github.com/shoenig/test/must"
    14  	"github.com/stretchr/testify/require"
    15  )
    16  
    17  // TestVolumeWatch_EnableDisable tests the watcher registration logic that needs
    18  // to happen during leader step-up/step-down
    19  func TestVolumeWatch_EnableDisable(t *testing.T) {
    20  	ci.Parallel(t)
    21  
    22  	srv := &MockRPCServer{}
    23  	srv.state = state.TestStateStore(t)
    24  	index := uint64(100)
    25  
    26  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
    27  	watcher.quiescentTimeout = 100 * time.Millisecond
    28  	watcher.SetEnabled(true, srv.State(), "")
    29  
    30  	plugin := mock.CSIPlugin()
    31  	node := testNode(plugin, srv.State())
    32  	alloc := mock.Alloc()
    33  	alloc.ClientStatus = structs.AllocClientStatusComplete
    34  
    35  	vol := testVolume(plugin, alloc, node.ID)
    36  
    37  	index++
    38  	err := srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
    39  	require.NoError(t, err)
    40  
    41  	// need to have just enough of a volume and claim in place so that
    42  	// the watcher doesn't immediately stop and unload itself
    43  	claim := &structs.CSIVolumeClaim{
    44  		Mode:  structs.CSIVolumeClaimGC,
    45  		State: structs.CSIVolumeClaimStateNodeDetached,
    46  	}
    47  	index++
    48  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
    49  	require.NoError(t, err)
    50  	require.Eventually(t, func() bool {
    51  		watcher.wlock.RLock()
    52  		defer watcher.wlock.RUnlock()
    53  		return 1 == len(watcher.watchers)
    54  	}, time.Second, 10*time.Millisecond)
    55  
    56  	watcher.SetEnabled(false, nil, "")
    57  	watcher.wlock.RLock()
    58  	defer watcher.wlock.RUnlock()
    59  	require.Equal(t, 0, len(watcher.watchers))
    60  }
    61  
    62  // TestVolumeWatch_LeadershipTransition tests the correct behavior of
    63  // claim reaping across leader step-up/step-down
    64  func TestVolumeWatch_LeadershipTransition(t *testing.T) {
    65  	ci.Parallel(t)
    66  
    67  	srv := &MockRPCServer{}
    68  	srv.state = state.TestStateStore(t)
    69  	index := uint64(100)
    70  
    71  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
    72  	watcher.quiescentTimeout = 100 * time.Millisecond
    73  
    74  	plugin := mock.CSIPlugin()
    75  	node := testNode(plugin, srv.State())
    76  	alloc := mock.Alloc()
    77  	alloc.ClientStatus = structs.AllocClientStatusRunning
    78  	vol := testVolume(plugin, alloc, node.ID)
    79  
    80  	index++
    81  	err := srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index,
    82  		[]*structs.Allocation{alloc})
    83  	require.NoError(t, err)
    84  
    85  	watcher.SetEnabled(true, srv.State(), "")
    86  
    87  	index++
    88  	err = srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
    89  	require.NoError(t, err)
    90  
    91  	// we should get or start up a watcher when we get an update for
    92  	// the volume from the state store
    93  	require.Eventually(t, func() bool {
    94  		watcher.wlock.RLock()
    95  		defer watcher.wlock.RUnlock()
    96  		return 1 == len(watcher.watchers)
    97  	}, time.Second, 10*time.Millisecond)
    98  
    99  	vol, _ = srv.State().CSIVolumeByID(nil, vol.Namespace, vol.ID)
   100  	require.Len(t, vol.PastClaims, 0, "expected to have 0 PastClaims")
   101  	require.Equal(t, srv.countCSIUnpublish, 0, "expected no CSI.Unpublish RPC calls")
   102  
   103  	// trying to test a dropped watch is racy, so to reliably simulate
   104  	// this condition, step-down the watcher first and then perform
   105  	// the writes to the volume before starting the new watcher. no
   106  	// watches for that change will fire on the new watcher
   107  
   108  	// step-down (this is sync)
   109  	watcher.SetEnabled(false, nil, "")
   110  	watcher.wlock.RLock()
   111  	require.Equal(t, 0, len(watcher.watchers))
   112  	watcher.wlock.RUnlock()
   113  
   114  	// allocation is now invalid
   115  	index++
   116  	err = srv.State().DeleteEval(index, []string{}, []string{alloc.ID}, false)
   117  	require.NoError(t, err)
   118  
   119  	// emit a GC so that we have a volume change that's dropped
   120  	claim := &structs.CSIVolumeClaim{
   121  		AllocationID: alloc.ID,
   122  		NodeID:       node.ID,
   123  		Mode:         structs.CSIVolumeClaimGC,
   124  		State:        structs.CSIVolumeClaimStateUnpublishing,
   125  	}
   126  	index++
   127  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   128  	require.NoError(t, err)
   129  
   130  	// create a new watcher and enable it to simulate the leadership
   131  	// transition
   132  	watcher = NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   133  	watcher.quiescentTimeout = 100 * time.Millisecond
   134  	watcher.SetEnabled(true, srv.State(), "")
   135  
   136  	require.Eventually(t, func() bool {
   137  		watcher.wlock.RLock()
   138  		defer watcher.wlock.RUnlock()
   139  		return 0 == len(watcher.watchers)
   140  	}, time.Second, 10*time.Millisecond)
   141  
   142  	vol, _ = srv.State().CSIVolumeByID(nil, vol.Namespace, vol.ID)
   143  	require.Len(t, vol.PastClaims, 1, "expected to have 1 PastClaim")
   144  	require.Equal(t, srv.countCSIUnpublish, 1, "expected CSI.Unpublish RPC to be called")
   145  }
   146  
   147  // TestVolumeWatch_StartStop tests the start and stop of the watcher when
   148  // it receives notifcations and has completed its work
   149  func TestVolumeWatch_StartStop(t *testing.T) {
   150  	ci.Parallel(t)
   151  
   152  	srv := &MockStatefulRPCServer{}
   153  	srv.state = state.TestStateStore(t)
   154  	index := uint64(100)
   155  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   156  	watcher.quiescentTimeout = 100 * time.Millisecond
   157  
   158  	watcher.SetEnabled(true, srv.State(), "")
   159  	require.Equal(t, 0, len(watcher.watchers))
   160  
   161  	plugin := mock.CSIPlugin()
   162  	node := testNode(plugin, srv.State())
   163  	alloc1 := mock.Alloc()
   164  	alloc1.ClientStatus = structs.AllocClientStatusRunning
   165  	alloc2 := mock.Alloc()
   166  	alloc2.Job = alloc1.Job
   167  	alloc2.ClientStatus = structs.AllocClientStatusRunning
   168  	index++
   169  	err := srv.State().UpsertJob(structs.MsgTypeTestSetup, index, alloc1.Job)
   170  	require.NoError(t, err)
   171  	index++
   172  	err = srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1, alloc2})
   173  	require.NoError(t, err)
   174  
   175  	// register a volume and an unused volume
   176  	vol := testVolume(plugin, alloc1, node.ID)
   177  	index++
   178  	err = srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
   179  	require.NoError(t, err)
   180  
   181  	// assert we get a watcher; there are no claims so it should immediately stop
   182  	require.Eventually(t, func() bool {
   183  		watcher.wlock.RLock()
   184  		defer watcher.wlock.RUnlock()
   185  		return 0 == len(watcher.watchers)
   186  	}, time.Second*2, 10*time.Millisecond)
   187  
   188  	// claim the volume for both allocs
   189  	claim := &structs.CSIVolumeClaim{
   190  		AllocationID: alloc1.ID,
   191  		NodeID:       node.ID,
   192  		Mode:         structs.CSIVolumeClaimRead,
   193  		AccessMode:   structs.CSIVolumeAccessModeMultiNodeReader,
   194  	}
   195  
   196  	index++
   197  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   198  	require.NoError(t, err)
   199  	claim.AllocationID = alloc2.ID
   200  	index++
   201  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   202  	require.NoError(t, err)
   203  
   204  	// reap the volume and assert nothing has happened
   205  	claim = &structs.CSIVolumeClaim{
   206  		AllocationID: alloc1.ID,
   207  		NodeID:       node.ID,
   208  	}
   209  	index++
   210  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   211  	require.NoError(t, err)
   212  
   213  	ws := memdb.NewWatchSet()
   214  	vol, _ = srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
   215  	require.Equal(t, 2, len(vol.ReadAllocs))
   216  
   217  	// alloc becomes terminal
   218  	alloc1 = alloc1.Copy()
   219  	alloc1.ClientStatus = structs.AllocClientStatusComplete
   220  	index++
   221  	err = srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1})
   222  	require.NoError(t, err)
   223  	index++
   224  	claim.State = structs.CSIVolumeClaimStateReadyToFree
   225  	err = srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID, claim)
   226  	require.NoError(t, err)
   227  
   228  	// watcher stops and 1 claim has been released
   229  	require.Eventually(t, func() bool {
   230  		watcher.wlock.RLock()
   231  		defer watcher.wlock.RUnlock()
   232  		return 0 == len(watcher.watchers)
   233  	}, time.Second*5, 10*time.Millisecond)
   234  
   235  	vol, _ = srv.State().CSIVolumeByID(ws, vol.Namespace, vol.ID)
   236  	must.Eq(t, 1, len(vol.ReadAllocs))
   237  	must.Eq(t, 0, len(vol.PastClaims))
   238  }
   239  
   240  // TestVolumeWatch_Delete tests the stop of the watcher when it receives
   241  // notifications around a deleted volume
   242  func TestVolumeWatch_Delete(t *testing.T) {
   243  	ci.Parallel(t)
   244  
   245  	srv := &MockStatefulRPCServer{}
   246  	srv.state = state.TestStateStore(t)
   247  	index := uint64(100)
   248  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   249  	watcher.quiescentTimeout = 100 * time.Millisecond
   250  
   251  	watcher.SetEnabled(true, srv.State(), "")
   252  	must.Eq(t, 0, len(watcher.watchers))
   253  
   254  	// register an unused volume
   255  	plugin := mock.CSIPlugin()
   256  	vol := mock.CSIVolume(plugin)
   257  	index++
   258  	must.NoError(t, srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol}))
   259  
   260  	// assert we get a watcher; there are no claims so it should immediately stop
   261  	require.Eventually(t, func() bool {
   262  		watcher.wlock.RLock()
   263  		defer watcher.wlock.RUnlock()
   264  		return 0 == len(watcher.watchers)
   265  	}, time.Second*2, 10*time.Millisecond)
   266  
   267  	// write a GC claim to the volume and then immediately delete, to
   268  	// potentially hit the race condition between updates and deletes
   269  	index++
   270  	must.NoError(t, srv.State().CSIVolumeClaim(index, vol.Namespace, vol.ID,
   271  		&structs.CSIVolumeClaim{
   272  			Mode:  structs.CSIVolumeClaimGC,
   273  			State: structs.CSIVolumeClaimStateReadyToFree,
   274  		}))
   275  
   276  	index++
   277  	must.NoError(t, srv.State().CSIVolumeDeregister(
   278  		index, vol.Namespace, []string{vol.ID}, false))
   279  
   280  	// the watcher should not be running
   281  	require.Eventually(t, func() bool {
   282  		watcher.wlock.RLock()
   283  		defer watcher.wlock.RUnlock()
   284  		return 0 == len(watcher.watchers)
   285  	}, time.Second*5, 10*time.Millisecond)
   286  
   287  }
   288  
   289  // TestVolumeWatch_RegisterDeregister tests the start and stop of
   290  // watchers around registration
   291  func TestVolumeWatch_RegisterDeregister(t *testing.T) {
   292  	ci.Parallel(t)
   293  
   294  	srv := &MockStatefulRPCServer{}
   295  	srv.state = state.TestStateStore(t)
   296  
   297  	index := uint64(100)
   298  
   299  	watcher := NewVolumesWatcher(testlog.HCLogger(t), srv, "")
   300  	watcher.quiescentTimeout = 10 * time.Millisecond
   301  
   302  	watcher.SetEnabled(true, srv.State(), "")
   303  	require.Equal(t, 0, len(watcher.watchers))
   304  
   305  	plugin := mock.CSIPlugin()
   306  	alloc := mock.Alloc()
   307  	alloc.ClientStatus = structs.AllocClientStatusComplete
   308  
   309  	// register a volume without claims
   310  	vol := mock.CSIVolume(plugin)
   311  	index++
   312  	err := srv.State().UpsertCSIVolume(index, []*structs.CSIVolume{vol})
   313  	require.NoError(t, err)
   314  
   315  	// watcher should stop
   316  	require.Eventually(t, func() bool {
   317  		watcher.wlock.RLock()
   318  		defer watcher.wlock.RUnlock()
   319  		return 0 == len(watcher.watchers)
   320  	}, time.Second, 10*time.Millisecond)
   321  }