github.com/hernad/nomad@v1.6.112/e2e/csi/csi.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package csi 5 6 import ( 7 "bytes" 8 "context" 9 "fmt" 10 "io" 11 "os" 12 "os/exec" 13 "regexp" 14 "strconv" 15 "strings" 16 "time" 17 18 "github.com/hernad/nomad/api" 19 e2e "github.com/hernad/nomad/e2e/e2eutil" 20 "github.com/hernad/nomad/e2e/framework" 21 "github.com/hernad/nomad/helper/uuid" 22 "github.com/hernad/nomad/testutil" 23 ) 24 25 func init() { 26 framework.AddSuites(&framework.TestSuite{ 27 Component: "CSI", 28 CanRunLocal: true, 29 Consul: false, 30 Cases: []framework.TestCase{ 31 new(CSIControllerPluginEBSTest), // see ebs.go 32 new(CSINodeOnlyPluginEFSTest), // see efs.go 33 }, 34 }) 35 } 36 37 const ns = "" 38 39 var pluginAllocWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 12} // 1min 40 var pluginWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36} // 3min 41 var reapWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36} // 3min 42 43 // assertNoErrorElseDump calls a non-halting assert on the error and dumps the 44 // plugin logs if it fails. 45 func assertNoErrorElseDump(f *framework.F, err error, msg string, pluginJobIDs []string) { 46 if err != nil { 47 dumpLogs(pluginJobIDs) 48 f.Assert().NoError(err, fmt.Sprintf("%v: %v", msg, err)) 49 } 50 } 51 52 // requireNoErrorElseDump calls a halting assert on the error and dumps the 53 // plugin logs if it fails. 54 func requireNoErrorElseDump(f *framework.F, err error, msg string, pluginJobIDs []string) { 55 if err != nil { 56 dumpLogs(pluginJobIDs) 57 f.NoError(err, fmt.Sprintf("%v: %v", msg, err)) 58 } 59 } 60 61 func dumpLogs(pluginIDs []string) error { 62 63 for _, id := range pluginIDs { 64 allocs, err := e2e.AllocsForJob(id, ns) 65 if err != nil { 66 return fmt.Errorf("could not find allocs for plugin: %v", err) 67 } 68 for _, alloc := range allocs { 69 allocID := alloc["ID"] 70 out, err := e2e.AllocLogs(allocID, "", e2e.LogsStdErr) 71 if err != nil { 72 return fmt.Errorf("could not get logs for alloc: %v\n%s", err, out) 73 } 74 _, isCI := os.LookupEnv("CI") 75 if isCI { 76 fmt.Println("--------------------------------------") 77 fmt.Println("allocation logs:", allocID) 78 fmt.Println(out) 79 continue 80 } 81 f, err := os.Create(allocID + ".log") 82 if err != nil { 83 return fmt.Errorf("could not create log file: %v", err) 84 } 85 defer f.Close() 86 _, err = f.WriteString(out) 87 if err != nil { 88 return fmt.Errorf("could not write to log file: %v", err) 89 } 90 fmt.Printf("nomad alloc logs written to %s.log\n", allocID) 91 } 92 } 93 return nil 94 } 95 96 // waitForVolumeClaimRelease makes sure we don't try to re-claim a volume 97 // that's in the process of being unpublished. we can't just wait for allocs 98 // to stop, but need to wait for their claims to be released 99 func waitForVolumeClaimRelease(volID string, wc *e2e.WaitConfig) error { 100 var out string 101 var err error 102 testutil.WaitForResultRetries(wc.Retries, func() (bool, error) { 103 time.Sleep(wc.Interval) 104 out, err = e2e.Command("nomad", "volume", "status", volID) 105 if err != nil { 106 return false, err 107 } 108 section, err := e2e.GetSection(out, "Allocations") 109 if err != nil { 110 return false, err 111 } 112 return strings.Contains(section, "No allocations placed"), nil 113 }, func(e error) { 114 if e == nil { 115 err = nil 116 } 117 err = fmt.Errorf("alloc claim was not released: %v\n%s", e, out) 118 }) 119 return err 120 } 121 122 // TODO(tgross): replace this w/ AllocFS().Stat() after 123 // https://github.com/hernad/nomad/issues/7365 is fixed 124 func readFile(client *api.Client, allocID string, path string) (bytes.Buffer, error) { 125 var stdout, stderr bytes.Buffer 126 alloc, _, err := client.Allocations().Info(allocID, nil) 127 if err != nil { 128 return stdout, err 129 } 130 ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second) 131 defer cancelFn() 132 133 _, err = client.Allocations().Exec(ctx, 134 alloc, "task", false, 135 []string{"cat", path}, 136 os.Stdin, &stdout, &stderr, 137 make(chan api.TerminalSize), nil) 138 return stdout, err 139 } 140 141 func waitForPluginStatusMinNodeCount(pluginID string, minCount int, wc *e2e.WaitConfig) error { 142 143 return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) { 144 expected, err := e2e.GetField(out, "Nodes Expected") 145 if err != nil { 146 return false, err 147 } 148 expectedCount, err := strconv.Atoi(strings.TrimSpace(expected)) 149 if err != nil { 150 return false, err 151 } 152 if expectedCount < minCount { 153 return false, fmt.Errorf( 154 "expected Nodes Expected >= %d, got %q", minCount, expected) 155 } 156 healthy, err := e2e.GetField(out, "Nodes Healthy") 157 if err != nil { 158 return false, err 159 } 160 if healthy != expected { 161 return false, fmt.Errorf( 162 "expected Nodes Healthy >= %d, got %q", minCount, healthy) 163 } 164 return true, nil 165 }, wc) 166 } 167 168 func waitForPluginStatusControllerCount(pluginID string, count int, wc *e2e.WaitConfig) error { 169 170 return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) { 171 172 expected, err := e2e.GetField(out, "Controllers Expected") 173 if err != nil { 174 return false, err 175 } 176 expectedCount, err := strconv.Atoi(strings.TrimSpace(expected)) 177 if err != nil { 178 return false, err 179 } 180 if expectedCount != count { 181 return false, fmt.Errorf( 182 "expected Controllers Expected = %d, got %d", count, expectedCount) 183 } 184 healthy, err := e2e.GetField(out, "Controllers Healthy") 185 if err != nil { 186 return false, err 187 } 188 healthyCount, err := strconv.Atoi(strings.TrimSpace(healthy)) 189 if err != nil { 190 return false, err 191 } 192 if healthyCount != count { 193 return false, fmt.Errorf( 194 "expected Controllers Healthy = %d, got %d", count, healthyCount) 195 } 196 return true, nil 197 198 }, wc) 199 } 200 201 func waitForPluginStatusCompare(pluginID string, compare func(got string) (bool, error), wc *e2e.WaitConfig) error { 202 var err error 203 testutil.WaitForResultRetries(wc.Retries, func() (bool, error) { 204 time.Sleep(wc.Interval) 205 out, err := e2e.Command("nomad", "plugin", "status", pluginID) 206 if err != nil { 207 return false, err 208 } 209 return compare(out) 210 }, func(e error) { 211 err = fmt.Errorf("plugin status check failed: %v", e) 212 }) 213 return err 214 } 215 216 // volumeRegister creates or registers a volume spec from a file but with a 217 // unique ID. The caller is responsible for recording that ID for later 218 // cleanup. 219 func volumeRegister(volID, volFilePath, createOrRegister string) error { 220 221 // a CSI RPC to create a volume can take a long time because we 222 // have to wait on the AWS API to provision a disk, but a register 223 // should not because it only has to check the API for compatibility 224 timeout := time.Second * 30 225 if createOrRegister == "create" { 226 timeout = time.Minute * 2 227 } 228 ctx, cancel := context.WithTimeout(context.Background(), timeout) 229 defer cancel() 230 231 cmd := exec.CommandContext(ctx, "nomad", "volume", createOrRegister, "-") 232 stdin, err := cmd.StdinPipe() 233 if err != nil { 234 return fmt.Errorf("could not open stdin?: %w", err) 235 } 236 237 content, err := os.ReadFile(volFilePath) 238 if err != nil { 239 return fmt.Errorf("could not open vol file: %w", err) 240 } 241 242 // hack off the first line to replace with our unique ID 243 var idRegex = regexp.MustCompile(`(?m)^id[\s]+= ".*"`) 244 volspec := idRegex.ReplaceAllString(string(content), 245 fmt.Sprintf("id = %q", volID)) 246 247 // the EBS plugin uses the name as an idempotency token across the 248 // whole AWS account, so it has to be globally unique 249 var nameRegex = regexp.MustCompile(`(?m)^name[\s]+= ".*"`) 250 volspec = nameRegex.ReplaceAllString(volspec, 251 fmt.Sprintf("name = %q", uuid.Generate())) 252 253 go func() { 254 defer stdin.Close() 255 io.WriteString(stdin, volspec) 256 }() 257 258 out, err := cmd.CombinedOutput() 259 if err != nil { 260 return fmt.Errorf("could not register vol: %w\n%v", err, string(out)) 261 } 262 return nil 263 }