github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/e2e/csi/csi.go (about) 1 package csi 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "os/exec" 11 "regexp" 12 "strconv" 13 "strings" 14 "time" 15 16 "github.com/stretchr/testify/require" 17 18 "github.com/hashicorp/nomad/api" 19 e2e "github.com/hashicorp/nomad/e2e/e2eutil" 20 "github.com/hashicorp/nomad/e2e/framework" 21 "github.com/hashicorp/nomad/helper/uuid" 22 "github.com/hashicorp/nomad/testutil" 23 ) 24 25 type CSIVolumesTest struct { 26 framework.TC 27 testJobIDs []string 28 volumeIDs []string 29 pluginJobIDs []string 30 } 31 32 func init() { 33 framework.AddSuites(&framework.TestSuite{ 34 Component: "CSI", 35 CanRunLocal: true, 36 Consul: false, 37 Cases: []framework.TestCase{ 38 new(CSIVolumesTest), 39 }, 40 }) 41 } 42 43 const ns = "" 44 45 var pluginWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36} // 3min 46 var reapWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36} // 3min 47 48 func (tc *CSIVolumesTest) BeforeAll(f *framework.F) { 49 t := f.T() 50 51 _, err := os.Stat("csi/input/volume-ebs.hcl") 52 if err != nil { 53 t.Skip("skipping CSI test because EBS volume spec file missing:", err) 54 } 55 56 _, err = os.Stat("csi/input/volume-efs.hcl") 57 if err != nil { 58 t.Skip("skipping CSI test because EFS volume spec file missing:", err) 59 } 60 61 // Ensure cluster has leader and at least two client 62 // nodes in a ready state before running tests 63 e2e.WaitForLeader(t, tc.Nomad()) 64 e2e.WaitForNodesReady(t, tc.Nomad(), 2) 65 } 66 67 // TestEBSVolumeClaim launches AWS EBS plugins and registers an EBS volume 68 // as a Nomad CSI volume. We then deploy a job that writes to the volume, 69 // stop that job, and reuse the volume for another job which should be able 70 // to read the data written by the first job. 71 func (tc *CSIVolumesTest) TestEBSVolumeClaim(f *framework.F) { 72 t := f.T() 73 require := require.New(t) 74 nomadClient := tc.Nomad() 75 uuid := uuid.Generate() 76 pluginID := "aws-ebs0" 77 78 // deploy the controller plugin job 79 controllerJobID := "aws-ebs-plugin-controller-" + uuid[0:8] 80 f.NoError(e2e.Register(controllerJobID, "csi/input/plugin-aws-ebs-controller.nomad")) 81 tc.pluginJobIDs = append(tc.pluginJobIDs, controllerJobID) 82 expected := []string{"running", "running"} 83 f.NoError( 84 e2e.WaitForAllocStatusExpected(controllerJobID, ns, expected), 85 "job should be running") 86 87 // deploy the node plugins job 88 nodesJobID := "aws-ebs-plugin-nodes-" + uuid[0:8] 89 f.NoError(e2e.Register(nodesJobID, "csi/input/plugin-aws-ebs-nodes.nomad")) 90 tc.pluginJobIDs = append(tc.pluginJobIDs, nodesJobID) 91 92 f.NoError(e2e.WaitForAllocStatusComparison( 93 func() ([]string, error) { return e2e.AllocStatuses(nodesJobID, ns) }, 94 func(got []string) bool { 95 for _, status := range got { 96 if status != "running" { 97 return false 98 } 99 } 100 return true 101 }, nil, 102 )) 103 104 f.NoError(waitForPluginStatusControllerCount(pluginID, 2, pluginWait), 105 "aws-ebs0 controller plugins did not become healthy") 106 f.NoError(waitForPluginStatusMinNodeCount(pluginID, 2, pluginWait), 107 "aws-ebs0 node plugins did not become healthy") 108 109 // register a volume 110 // TODO: we don't have a unique ID threaded thru the jobspec yet 111 volID := "ebs-vol0" 112 err := volumeRegister(volID, "csi/input/volume-ebs.hcl") 113 require.NoError(err) 114 tc.volumeIDs = append(tc.volumeIDs, volID) 115 116 // deploy a job that writes to the volume 117 writeJobID := "write-ebs-" + uuid[0:8] 118 f.NoError(e2e.Register(writeJobID, "csi/input/use-ebs-volume.nomad")) 119 f.NoError( 120 e2e.WaitForAllocStatusExpected(writeJobID, ns, []string{"running"}), 121 "job should be running") 122 123 allocs, err := e2e.AllocsForJob(writeJobID, ns) 124 f.NoError(err, "could not get allocs for write job") 125 f.Len(allocs, 1, "could not get allocs for write job") 126 writeAllocID := allocs[0]["ID"] 127 128 // read data from volume and assert the writer wrote a file to it 129 expectedPath := "/task/test/" + writeAllocID 130 _, err = readFile(nomadClient, writeAllocID, expectedPath) 131 require.NoError(err) 132 133 // Shutdown (and purge) the writer so we can run a reader. 134 // we could mount the EBS volume with multi-attach, but we 135 // want this test to exercise the unpublish workflow. 136 _, err = e2e.Command("nomad", "job", "stop", "-purge", writeJobID) 137 require.NoError(err) 138 139 // wait for the volume unpublish workflow to complete 140 require.NoError(waitForVolumeClaimRelease(volID, reapWait), 141 "write-ebs alloc claim was not released") 142 143 // deploy a job so we can read from the volume 144 readJobID := "read-ebs-" + uuid[0:8] 145 tc.testJobIDs = append(tc.testJobIDs, readJobID) // ensure failed tests clean up 146 f.NoError(e2e.Register(readJobID, "csi/input/use-ebs-volume.nomad")) 147 f.NoError( 148 e2e.WaitForAllocStatusExpected(readJobID, ns, []string{"running"}), 149 "job should be running") 150 151 allocs, err = e2e.AllocsForJob(readJobID, ns) 152 f.NoError(err, "could not get allocs for read job") 153 f.Len(allocs, 1, "could not get allocs for read job") 154 readAllocID := allocs[0]["ID"] 155 156 // read data from volume and assert we can read the file the writer wrote 157 expectedPath = "/task/test/" + readAllocID 158 _, err = readFile(nomadClient, readAllocID, expectedPath) 159 require.NoError(err) 160 161 } 162 163 // TestEFSVolumeClaim launches AWS EFS plugins and registers an EFS volume 164 // as a Nomad CSI volume. We then deploy a job that writes to the volume, 165 // and share the volume with another job which should be able to read the 166 // data written by the first job. 167 func (tc *CSIVolumesTest) TestEFSVolumeClaim(f *framework.F) { 168 t := f.T() 169 require := require.New(t) 170 nomadClient := tc.Nomad() 171 uuid := uuid.Generate() 172 pluginID := "aws-efs0" 173 174 // deploy the node plugins job (no need for a controller for EFS) 175 nodesJobID := "aws-efs-plugin-nodes-" + uuid[0:8] 176 f.NoError(e2e.Register(nodesJobID, "csi/input/plugin-aws-efs-nodes.nomad")) 177 tc.pluginJobIDs = append(tc.pluginJobIDs, nodesJobID) 178 179 f.NoError(e2e.WaitForAllocStatusComparison( 180 func() ([]string, error) { return e2e.AllocStatuses(nodesJobID, ns) }, 181 func(got []string) bool { 182 for _, status := range got { 183 if status != "running" { 184 return false 185 } 186 } 187 return true 188 }, nil, 189 )) 190 191 f.NoError(waitForPluginStatusMinNodeCount(pluginID, 2, pluginWait), 192 "aws-efs0 node plugins did not become healthy") 193 194 // register a volume 195 volID := "efs-vol0" 196 err := volumeRegister(volID, "csi/input/volume-efs.hcl") 197 require.NoError(err) 198 tc.volumeIDs = append(tc.volumeIDs, volID) 199 200 // deploy a job that writes to the volume 201 writeJobID := "write-efs-" + uuid[0:8] 202 tc.testJobIDs = append(tc.testJobIDs, writeJobID) // ensure failed tests clean up 203 f.NoError(e2e.Register(writeJobID, "csi/input/use-efs-volume-write.nomad")) 204 f.NoError( 205 e2e.WaitForAllocStatusExpected(writeJobID, ns, []string{"running"}), 206 "job should be running") 207 208 allocs, err := e2e.AllocsForJob(writeJobID, ns) 209 f.NoError(err, "could not get allocs for write job") 210 f.Len(allocs, 1, "could not get allocs for write job") 211 writeAllocID := allocs[0]["ID"] 212 213 // read data from volume and assert the writer wrote a file to it 214 expectedPath := "/task/test/" + writeAllocID 215 _, err = readFile(nomadClient, writeAllocID, expectedPath) 216 require.NoError(err) 217 218 // Shutdown the writer so we can run a reader. 219 // although EFS should support multiple readers, the plugin 220 // does not. 221 _, err = e2e.Command("nomad", "job", "stop", writeJobID) 222 require.NoError(err) 223 224 // wait for the volume unpublish workflow to complete 225 require.NoError(waitForVolumeClaimRelease(volID, reapWait), 226 "write-efs alloc claim was not released") 227 228 // deploy a job that reads from the volume 229 readJobID := "read-efs-" + uuid[0:8] 230 tc.testJobIDs = append(tc.testJobIDs, readJobID) // ensure failed tests clean up 231 f.NoError(e2e.Register(readJobID, "csi/input/use-efs-volume-read.nomad")) 232 f.NoError( 233 e2e.WaitForAllocStatusExpected(readJobID, ns, []string{"running"}), 234 "job should be running") 235 236 allocs, err = e2e.AllocsForJob(readJobID, ns) 237 f.NoError(err, "could not get allocs for read job") 238 f.Len(allocs, 1, "could not get allocs for read job") 239 readAllocID := allocs[0]["ID"] 240 241 // read data from volume and assert the writer wrote a file to it 242 require.NoError(err) 243 _, err = readFile(nomadClient, readAllocID, expectedPath) 244 require.NoError(err) 245 } 246 247 func (tc *CSIVolumesTest) AfterEach(f *framework.F) { 248 249 // Stop all jobs in test 250 for _, id := range tc.testJobIDs { 251 out, err := e2e.Command("nomad", "job", "stop", "-purge", id) 252 f.Assert().NoError(err, out) 253 } 254 tc.testJobIDs = []string{} 255 256 // Deregister all volumes in test 257 for _, id := range tc.volumeIDs { 258 // make sure all the test jobs have finished unpublishing claims 259 err := waitForVolumeClaimRelease(id, reapWait) 260 f.Assert().NoError(err, "volume claims were not released") 261 262 out, err := e2e.Command("nomad", "volume", "deregister", id) 263 if err != nil { 264 fmt.Println("could not deregister volume, dumping allocation logs") 265 f.Assert().NoError(tc.dumpLogs()) 266 } 267 f.Assert().NoError(err, out) 268 } 269 tc.volumeIDs = []string{} 270 271 // Deregister all plugin jobs in test 272 for _, id := range tc.pluginJobIDs { 273 out, err := e2e.Command("nomad", "job", "stop", "-purge", id) 274 f.Assert().NoError(err, out) 275 } 276 tc.pluginJobIDs = []string{} 277 278 // Garbage collect 279 out, err := e2e.Command("nomad", "system", "gc") 280 f.Assert().NoError(err, out) 281 } 282 283 // waitForVolumeClaimRelease makes sure we don't try to re-claim a volume 284 // that's in the process of being unpublished. we can't just wait for allocs 285 // to stop, but need to wait for their claims to be released 286 func waitForVolumeClaimRelease(volID string, wc *e2e.WaitConfig) error { 287 var out string 288 var err error 289 testutil.WaitForResultRetries(wc.Retries, func() (bool, error) { 290 time.Sleep(wc.Interval) 291 out, err = e2e.Command("nomad", "volume", "status", volID) 292 if err != nil { 293 return false, err 294 } 295 section, err := e2e.GetSection(out, "Allocations") 296 if err != nil { 297 return false, err 298 } 299 return strings.Contains(section, "No allocations placed"), nil 300 }, func(e error) { 301 if e == nil { 302 err = nil 303 } 304 err = fmt.Errorf("alloc claim was not released: %v\n%s", e, out) 305 }) 306 return err 307 } 308 309 func (tc *CSIVolumesTest) dumpLogs() error { 310 311 for _, id := range tc.pluginJobIDs { 312 allocs, err := e2e.AllocsForJob(id, ns) 313 if err != nil { 314 return fmt.Errorf("could not find allocs for plugin: %v", err) 315 } 316 for _, alloc := range allocs { 317 allocID := alloc["ID"] 318 out, err := e2e.AllocLogs(allocID, e2e.LogsStdErr) 319 if err != nil { 320 return fmt.Errorf("could not get logs for alloc: %v\n%s", err, out) 321 } 322 _, isCI := os.LookupEnv("CI") 323 if isCI { 324 fmt.Println("--------------------------------------") 325 fmt.Println("allocation logs:", allocID) 326 fmt.Println(out) 327 continue 328 } 329 f, err := os.Create(allocID + ".log") 330 if err != nil { 331 return fmt.Errorf("could not create log file: %v", err) 332 } 333 defer f.Close() 334 _, err = f.WriteString(out) 335 if err != nil { 336 return fmt.Errorf("could not write to log file: %v", err) 337 } 338 fmt.Printf("nomad alloc logs written to %s.log\n", allocID) 339 } 340 } 341 return nil 342 } 343 344 // TODO(tgross): replace this w/ AllocFS().Stat() after 345 // https://github.com/hashicorp/nomad/issues/7365 is fixed 346 func readFile(client *api.Client, allocID string, path string) (bytes.Buffer, error) { 347 var stdout, stderr bytes.Buffer 348 alloc, _, err := client.Allocations().Info(allocID, nil) 349 if err != nil { 350 return stdout, err 351 } 352 ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second) 353 defer cancelFn() 354 355 _, err = client.Allocations().Exec(ctx, 356 alloc, "task", false, 357 []string{"cat", path}, 358 os.Stdin, &stdout, &stderr, 359 make(chan api.TerminalSize), nil) 360 return stdout, err 361 } 362 363 func waitForPluginStatusMinNodeCount(pluginID string, minCount int, wc *e2e.WaitConfig) error { 364 365 return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) { 366 expected, err := e2e.GetField(out, "Nodes Expected") 367 if err != nil { 368 return false, err 369 } 370 expectedCount, err := strconv.Atoi(strings.TrimSpace(expected)) 371 if err != nil { 372 return false, err 373 } 374 if expectedCount < minCount { 375 return false, fmt.Errorf( 376 "expected Nodes Expected >= %d, got %q", minCount, expected) 377 } 378 healthy, err := e2e.GetField(out, "Nodes Healthy") 379 if err != nil { 380 return false, err 381 } 382 if healthy != expected { 383 return false, fmt.Errorf( 384 "expected Nodes Healthy >= %d, got %q", minCount, healthy) 385 } 386 return true, nil 387 }, wc) 388 } 389 390 func waitForPluginStatusControllerCount(pluginID string, count int, wc *e2e.WaitConfig) error { 391 392 return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) { 393 394 expected, err := e2e.GetField(out, "Controllers Expected") 395 if err != nil { 396 return false, err 397 } 398 expectedCount, err := strconv.Atoi(strings.TrimSpace(expected)) 399 if err != nil { 400 return false, err 401 } 402 if expectedCount != count { 403 return false, fmt.Errorf( 404 "expected Controllers Expected = %d, got %d", count, expectedCount) 405 } 406 healthy, err := e2e.GetField(out, "Controllers Healthy") 407 if err != nil { 408 return false, err 409 } 410 healthyCount, err := strconv.Atoi(strings.TrimSpace(healthy)) 411 if err != nil { 412 return false, err 413 } 414 if healthyCount != count { 415 return false, fmt.Errorf( 416 "expected Controllers Healthy = %d, got %d", count, healthyCount) 417 } 418 return true, nil 419 420 }, wc) 421 } 422 423 func waitForPluginStatusCompare(pluginID string, compare func(got string) (bool, error), wc *e2e.WaitConfig) error { 424 var err error 425 testutil.WaitForResultRetries(wc.Retries, func() (bool, error) { 426 time.Sleep(wc.Interval) 427 out, err := e2e.Command("nomad", "plugin", "status", pluginID) 428 if err != nil { 429 return false, err 430 } 431 return compare(out) 432 }, func(e error) { 433 err = fmt.Errorf("plugin status check failed: %v", e) 434 }) 435 return err 436 } 437 438 // VolumeRegister registers a jobspec from a file but with a unique ID. 439 // The caller is responsible for recording that ID for later cleanup. 440 func volumeRegister(volID, volFilePath string) error { 441 442 cmd := exec.Command("nomad", "volume", "register", "-") 443 stdin, err := cmd.StdinPipe() 444 if err != nil { 445 return fmt.Errorf("could not open stdin?: %w", err) 446 } 447 448 content, err := ioutil.ReadFile(volFilePath) 449 if err != nil { 450 return fmt.Errorf("could not open vol file: %w", err) 451 } 452 453 // hack off the first line to replace with our unique ID 454 var re = regexp.MustCompile(`(?m)^id ".*"`) 455 volspec := re.ReplaceAllString(string(content), 456 fmt.Sprintf("id = \"%s\"", volID)) 457 458 go func() { 459 defer stdin.Close() 460 io.WriteString(stdin, volspec) 461 }() 462 463 out, err := cmd.CombinedOutput() 464 if err != nil { 465 return fmt.Errorf("could not register vol: %w\n%v", err, string(out)) 466 } 467 return nil 468 }