github.com/hernad/nomad@v1.6.112/e2e/csi/ebs.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package csi 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/hernad/nomad/e2e/e2eutil" 11 e2e "github.com/hernad/nomad/e2e/e2eutil" 12 "github.com/hernad/nomad/e2e/framework" 13 "github.com/hernad/nomad/helper/uuid" 14 "github.com/hernad/nomad/testutil" 15 ) 16 17 // CSIControllerPluginEBSTest exercises the AWS EBS plugin, which is an 18 // example of a plugin that supports most of the CSI Controller RPCs. 19 type CSIControllerPluginEBSTest struct { 20 framework.TC 21 uuid string 22 testJobIDs []string 23 volumeIDs []string 24 pluginJobIDs []string 25 nodeIDs []string 26 } 27 28 const ebsPluginID = "aws-ebs0" 29 30 // BeforeAll waits for the cluster to be ready, deploys the CSI plugins, and 31 // creates two EBS volumes for use in the test. 32 func (tc *CSIControllerPluginEBSTest) BeforeAll(f *framework.F) { 33 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 34 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 2) 35 36 tc.uuid = uuid.Generate()[0:8] 37 38 // deploy the controller plugin job 39 controllerJobID := "aws-ebs-plugin-controller-" + tc.uuid 40 f.NoError(e2eutil.Register(controllerJobID, "csi/input/plugin-aws-ebs-controller.nomad")) 41 tc.pluginJobIDs = append(tc.pluginJobIDs, controllerJobID) 42 43 f.NoError(e2e.WaitForAllocStatusComparison( 44 func() ([]string, error) { return e2e.AllocStatuses(controllerJobID, ns) }, 45 func(got []string) bool { 46 if len(got) != 2 { 47 return false 48 } 49 for _, status := range got { 50 if status != "running" { 51 return false 52 } 53 } 54 return true 55 }, pluginAllocWait, 56 ), "plugin job should be running") 57 58 // deploy the node plugins job 59 nodesJobID := "aws-ebs-plugin-nodes-" + tc.uuid 60 f.NoError(e2eutil.Register(nodesJobID, "csi/input/plugin-aws-ebs-nodes.nomad")) 61 tc.pluginJobIDs = append(tc.pluginJobIDs, nodesJobID) 62 63 f.NoError(e2eutil.WaitForAllocStatusComparison( 64 func() ([]string, error) { return e2eutil.AllocStatuses(nodesJobID, ns) }, 65 func(got []string) bool { 66 for _, status := range got { 67 if status != "running" { 68 return false 69 } 70 } 71 return true 72 }, nil, 73 )) 74 75 f.NoError(waitForPluginStatusControllerCount(ebsPluginID, 2, pluginWait), 76 "aws-ebs0 controller plugins did not become healthy") 77 f.NoError(waitForPluginStatusMinNodeCount(ebsPluginID, 2, pluginWait), 78 "aws-ebs0 node plugins did not become healthy") 79 80 // ideally we'd wait until after we check `nomad volume status -verbose` 81 // to verify these volumes are ready, but the plugin doesn't support the 82 // CSI ListVolumes RPC 83 volID := "ebs-vol[0]" 84 err := volumeRegister(volID, "csi/input/ebs-volume0.hcl", "create") 85 requireNoErrorElseDump(f, err, "could not create volume", tc.pluginJobIDs) 86 tc.volumeIDs = append(tc.volumeIDs, volID) 87 88 volID = "ebs-vol[1]" 89 err = volumeRegister(volID, "csi/input/ebs-volume1.hcl", "create") 90 requireNoErrorElseDump(f, err, "could not create volume", tc.pluginJobIDs) 91 tc.volumeIDs = append(tc.volumeIDs, volID) 92 } 93 94 func (tc *CSIControllerPluginEBSTest) AfterEach(f *framework.F) { 95 96 // Ensure nodes are all restored 97 for _, id := range tc.nodeIDs { 98 _, err := e2eutil.Command("nomad", "node", "drain", "-disable", "-yes", id) 99 f.Assert().NoError(err) 100 _, err = e2eutil.Command("nomad", "node", "eligibility", "-enable", id) 101 f.Assert().NoError(err) 102 } 103 tc.nodeIDs = []string{} 104 105 // Stop all jobs in test 106 for _, id := range tc.testJobIDs { 107 err := e2eutil.StopJob(id, "-purge") 108 f.Assert().NoError(err) 109 } 110 tc.testJobIDs = []string{} 111 112 // Garbage collect 113 out, err := e2eutil.Command("nomad", "system", "gc") 114 f.Assert().NoError(err, out) 115 } 116 117 // AfterAll cleans up the volumes and plugin jobs created by the test. 118 func (tc *CSIControllerPluginEBSTest) AfterAll(f *framework.F) { 119 120 for _, volID := range tc.volumeIDs { 121 err := waitForVolumeClaimRelease(volID, reapWait) 122 f.Assert().NoError(err, "volume claims were not released") 123 124 out, err := e2eutil.Command("nomad", "volume", "delete", volID) 125 assertNoErrorElseDump(f, err, 126 fmt.Sprintf("could not delete volume:\n%v", out), tc.pluginJobIDs) 127 } 128 129 // Deregister all plugin jobs in test 130 for _, id := range tc.pluginJobIDs { 131 err := e2eutil.StopJob(id, "-purge") 132 f.Assert().NoError(err) 133 } 134 tc.pluginJobIDs = []string{} 135 136 // Garbage collect 137 out, err := e2eutil.Command("nomad", "system", "gc") 138 f.Assert().NoError(err, out) 139 140 } 141 142 // TestVolumeClaim exercises the volume publish/unpublish workflows for the 143 // EBS plugin. 144 func (tc *CSIControllerPluginEBSTest) TestVolumeClaim(f *framework.F) { 145 nomadClient := tc.Nomad() 146 147 // deploy a job that writes to the volume 148 writeJobID := "write-ebs-" + tc.uuid 149 f.NoError(e2eutil.Register(writeJobID, "csi/input/use-ebs-volume.nomad")) 150 f.NoError( 151 e2eutil.WaitForAllocStatusExpected(writeJobID, ns, []string{"running"}), 152 "job should be running") 153 154 allocs, err := e2eutil.AllocsForJob(writeJobID, ns) 155 f.NoError(err, "could not get allocs for write job") 156 f.Len(allocs, 1, "could not get allocs for write job") 157 writeAllocID := allocs[0]["ID"] 158 159 // read data from volume and assert the writer wrote a file to it 160 expectedPath := "/task/test/" + writeAllocID 161 _, err = readFile(nomadClient, writeAllocID, expectedPath) 162 f.NoError(err) 163 164 // Shutdown (and purge) the writer so we can run a reader. 165 // we could mount the EBS volume with multi-attach, but we 166 // want this test to exercise the unpublish workflow. 167 err = e2eutil.StopJob(writeJobID, "-purge") 168 f.NoError(err) 169 170 // wait for the volume unpublish workflow to complete 171 for _, volID := range tc.volumeIDs { 172 err := waitForVolumeClaimRelease(volID, reapWait) 173 f.NoError(err, "volume claims were not released") 174 } 175 176 // deploy a job so we can read from the volume 177 readJobID := "read-ebs-" + tc.uuid 178 tc.testJobIDs = append(tc.testJobIDs, readJobID) // ensure failed tests clean up 179 f.NoError(e2eutil.Register(readJobID, "csi/input/use-ebs-volume.nomad")) 180 f.NoError( 181 e2eutil.WaitForAllocStatusExpected(readJobID, ns, []string{"running"}), 182 "job should be running") 183 184 allocs, err = e2eutil.AllocsForJob(readJobID, ns) 185 f.NoError(err, "could not get allocs for read job") 186 f.Len(allocs, 1, "could not get allocs for read job") 187 readAllocID := allocs[0]["ID"] 188 189 // read data from volume and assert we can read the file the writer wrote 190 expectedPath = "/task/test/" + readAllocID 191 _, err = readFile(nomadClient, readAllocID, expectedPath) 192 f.NoError(err) 193 } 194 195 // TestSnapshot exercises the snapshot commands. 196 func (tc *CSIControllerPluginEBSTest) TestSnapshot(f *framework.F) { 197 198 out, err := e2eutil.Command("nomad", "volume", "snapshot", "create", 199 tc.volumeIDs[0], "snap-"+tc.uuid) 200 requireNoErrorElseDump(f, err, "could not create volume snapshot", tc.pluginJobIDs) 201 202 snaps, err := e2eutil.ParseColumns(out) 203 204 defer func() { 205 _, err := e2eutil.Command("nomad", "volume", "snapshot", "delete", 206 ebsPluginID, snaps[0]["Snapshot ID"]) 207 requireNoErrorElseDump(f, err, "could not delete volume snapshot", tc.pluginJobIDs) 208 }() 209 210 f.NoError(err, fmt.Sprintf("could not parse output:\n%v", out)) 211 f.Len(snaps, 1, fmt.Sprintf("could not parse output:\n%v", out)) 212 213 // the snapshot we're looking for should be the first one because 214 // we just created it, but give us some breathing room to allow 215 // for concurrent test runs 216 out, err = e2eutil.Command("nomad", "volume", "snapshot", "list", 217 "-plugin", ebsPluginID, "-per-page", "10") 218 requireNoErrorElseDump(f, err, "could not list volume snapshots", tc.pluginJobIDs) 219 f.Contains(out, snaps[0]["ID"], 220 fmt.Sprintf("volume snapshot list did not include expected snapshot:\n%v", out)) 221 } 222 223 // TestNodeDrain exercises the remounting behavior in the face of a node drain 224 func (tc *CSIControllerPluginEBSTest) TestNodeDrain(f *framework.F) { 225 226 nomadClient := tc.Nomad() 227 228 nodesJobID := "aws-ebs-plugin-nodes-" + tc.uuid 229 pluginAllocs, err := e2eutil.AllocsForJob(nodesJobID, ns) 230 f.NoError(err) 231 expectedHealthyNodePlugins := len(pluginAllocs) 232 233 // deploy a job that writes to the volume 234 writeJobID := "write-ebs-for-drain" + tc.uuid 235 f.NoError(e2eutil.Register(writeJobID, "csi/input/use-ebs-volume.nomad")) 236 f.NoError( 237 e2eutil.WaitForAllocStatusExpected(writeJobID, ns, []string{"running"}), 238 "job should be running") 239 tc.testJobIDs = append(tc.testJobIDs, writeJobID) // ensure failed tests clean up 240 241 allocs, err := e2eutil.AllocsForJob(writeJobID, ns) 242 f.NoError(err, "could not get allocs for write job") 243 f.Len(allocs, 1, "could not get allocs for write job") 244 writeAllocID := allocs[0]["ID"] 245 246 // read data from volume and assert the writer wrote a file to it 247 expectedPath := "/task/test/" + writeAllocID 248 _, err = readFile(nomadClient, writeAllocID, expectedPath) 249 f.NoError(err) 250 251 // intentionally set a long deadline so we can check the plugins 252 // haven't been moved 253 nodeID := allocs[0]["Node ID"] 254 out, err := e2eutil.Command("nomad", "node", 255 "drain", "-enable", 256 "-deadline", "10m", 257 "-yes", "-detach", nodeID) 258 f.NoError(err, fmt.Sprintf("'nomad node drain' failed: %v\n%v", err, out)) 259 tc.nodeIDs = append(tc.nodeIDs, nodeID) 260 261 wc := &e2eutil.WaitConfig{} 262 interval, retries := wc.OrDefault() 263 testutil.WaitForResultRetries(retries, func() (bool, error) { 264 time.Sleep(interval) 265 allocs, err := e2eutil.AllocsForJob(writeJobID, ns) 266 if err != nil { 267 return false, err 268 } 269 for _, alloc := range allocs { 270 if alloc["ID"] != writeAllocID { 271 if alloc["Status"] == "running" { 272 return true, nil 273 } 274 if alloc["Status"] == "failed" { 275 // no point in waiting anymore if we hit this case 276 f.T().Fatal("expected replacement alloc not to fail") 277 } 278 } 279 } 280 return false, fmt.Errorf("expected replacement alloc to be running") 281 }, func(e error) { 282 err = e 283 }) 284 285 pluginAllocs, err = e2eutil.AllocsForJob(nodesJobID, ns) 286 f.Lenf(pluginAllocs, expectedHealthyNodePlugins, 287 "expected node plugins to be unchanged, got: %v", pluginAllocs) 288 }