github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/csi/csi.go (about)

     1  package csi
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"os"
    10  	"os/exec"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/hashicorp/nomad/api"
    17  	e2e "github.com/hashicorp/nomad/e2e/e2eutil"
    18  	"github.com/hashicorp/nomad/e2e/framework"
    19  	"github.com/hashicorp/nomad/helper/uuid"
    20  	"github.com/hashicorp/nomad/testutil"
    21  )
    22  
    23  func init() {
    24  	framework.AddSuites(&framework.TestSuite{
    25  		Component:   "CSI",
    26  		CanRunLocal: true,
    27  		Consul:      false,
    28  		Cases: []framework.TestCase{
    29  			new(CSIControllerPluginEBSTest), // see ebs.go
    30  			new(CSINodeOnlyPluginEFSTest),   // see efs.go
    31  		},
    32  	})
    33  }
    34  
    35  const ns = ""
    36  
    37  var pluginAllocWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 12} // 1min
    38  var pluginWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36}      // 3min
    39  var reapWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36}        // 3min
    40  
    41  // assertNoErrorElseDump calls a non-halting assert on the error and dumps the
    42  // plugin logs if it fails.
    43  func assertNoErrorElseDump(f *framework.F, err error, msg string, pluginJobIDs []string) {
    44  	if err != nil {
    45  		dumpLogs(pluginJobIDs)
    46  		f.Assert().NoError(err, fmt.Sprintf("%v: %v", msg, err))
    47  	}
    48  }
    49  
    50  // requireNoErrorElseDump calls a halting assert on the error and dumps the
    51  // plugin logs if it fails.
    52  func requireNoErrorElseDump(f *framework.F, err error, msg string, pluginJobIDs []string) {
    53  	if err != nil {
    54  		dumpLogs(pluginJobIDs)
    55  		f.NoError(err, fmt.Sprintf("%v: %v", msg, err))
    56  	}
    57  }
    58  
    59  func dumpLogs(pluginIDs []string) error {
    60  
    61  	for _, id := range pluginIDs {
    62  		allocs, err := e2e.AllocsForJob(id, ns)
    63  		if err != nil {
    64  			return fmt.Errorf("could not find allocs for plugin: %v", err)
    65  		}
    66  		for _, alloc := range allocs {
    67  			allocID := alloc["ID"]
    68  			out, err := e2e.AllocLogs(allocID, e2e.LogsStdErr)
    69  			if err != nil {
    70  				return fmt.Errorf("could not get logs for alloc: %v\n%s", err, out)
    71  			}
    72  			_, isCI := os.LookupEnv("CI")
    73  			if isCI {
    74  				fmt.Println("--------------------------------------")
    75  				fmt.Println("allocation logs:", allocID)
    76  				fmt.Println(out)
    77  				continue
    78  			}
    79  			f, err := os.Create(allocID + ".log")
    80  			if err != nil {
    81  				return fmt.Errorf("could not create log file: %v", err)
    82  			}
    83  			defer f.Close()
    84  			_, err = f.WriteString(out)
    85  			if err != nil {
    86  				return fmt.Errorf("could not write to log file: %v", err)
    87  			}
    88  			fmt.Printf("nomad alloc logs written to %s.log\n", allocID)
    89  		}
    90  	}
    91  	return nil
    92  }
    93  
    94  // waitForVolumeClaimRelease makes sure we don't try to re-claim a volume
    95  // that's in the process of being unpublished. we can't just wait for allocs
    96  // to stop, but need to wait for their claims to be released
    97  func waitForVolumeClaimRelease(volID string, wc *e2e.WaitConfig) error {
    98  	var out string
    99  	var err error
   100  	testutil.WaitForResultRetries(wc.Retries, func() (bool, error) {
   101  		time.Sleep(wc.Interval)
   102  		out, err = e2e.Command("nomad", "volume", "status", volID)
   103  		if err != nil {
   104  			return false, err
   105  		}
   106  		section, err := e2e.GetSection(out, "Allocations")
   107  		if err != nil {
   108  			return false, err
   109  		}
   110  		return strings.Contains(section, "No allocations placed"), nil
   111  	}, func(e error) {
   112  		if e == nil {
   113  			err = nil
   114  		}
   115  		err = fmt.Errorf("alloc claim was not released: %v\n%s", e, out)
   116  	})
   117  	return err
   118  }
   119  
   120  // TODO(tgross): replace this w/ AllocFS().Stat() after
   121  // https://github.com/hashicorp/nomad/issues/7365 is fixed
   122  func readFile(client *api.Client, allocID string, path string) (bytes.Buffer, error) {
   123  	var stdout, stderr bytes.Buffer
   124  	alloc, _, err := client.Allocations().Info(allocID, nil)
   125  	if err != nil {
   126  		return stdout, err
   127  	}
   128  	ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second)
   129  	defer cancelFn()
   130  
   131  	_, err = client.Allocations().Exec(ctx,
   132  		alloc, "task", false,
   133  		[]string{"cat", path},
   134  		os.Stdin, &stdout, &stderr,
   135  		make(chan api.TerminalSize), nil)
   136  	return stdout, err
   137  }
   138  
   139  func waitForPluginStatusMinNodeCount(pluginID string, minCount int, wc *e2e.WaitConfig) error {
   140  
   141  	return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) {
   142  		expected, err := e2e.GetField(out, "Nodes Expected")
   143  		if err != nil {
   144  			return false, err
   145  		}
   146  		expectedCount, err := strconv.Atoi(strings.TrimSpace(expected))
   147  		if err != nil {
   148  			return false, err
   149  		}
   150  		if expectedCount < minCount {
   151  			return false, fmt.Errorf(
   152  				"expected Nodes Expected >= %d, got %q", minCount, expected)
   153  		}
   154  		healthy, err := e2e.GetField(out, "Nodes Healthy")
   155  		if err != nil {
   156  			return false, err
   157  		}
   158  		if healthy != expected {
   159  			return false, fmt.Errorf(
   160  				"expected Nodes Healthy >= %d, got %q", minCount, healthy)
   161  		}
   162  		return true, nil
   163  	}, wc)
   164  }
   165  
   166  func waitForPluginStatusControllerCount(pluginID string, count int, wc *e2e.WaitConfig) error {
   167  
   168  	return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) {
   169  
   170  		expected, err := e2e.GetField(out, "Controllers Expected")
   171  		if err != nil {
   172  			return false, err
   173  		}
   174  		expectedCount, err := strconv.Atoi(strings.TrimSpace(expected))
   175  		if err != nil {
   176  			return false, err
   177  		}
   178  		if expectedCount != count {
   179  			return false, fmt.Errorf(
   180  				"expected Controllers Expected = %d, got %d", count, expectedCount)
   181  		}
   182  		healthy, err := e2e.GetField(out, "Controllers Healthy")
   183  		if err != nil {
   184  			return false, err
   185  		}
   186  		healthyCount, err := strconv.Atoi(strings.TrimSpace(healthy))
   187  		if err != nil {
   188  			return false, err
   189  		}
   190  		if healthyCount != count {
   191  			return false, fmt.Errorf(
   192  				"expected Controllers Healthy = %d, got %d", count, healthyCount)
   193  		}
   194  		return true, nil
   195  
   196  	}, wc)
   197  }
   198  
   199  func waitForPluginStatusCompare(pluginID string, compare func(got string) (bool, error), wc *e2e.WaitConfig) error {
   200  	var err error
   201  	testutil.WaitForResultRetries(wc.Retries, func() (bool, error) {
   202  		time.Sleep(wc.Interval)
   203  		out, err := e2e.Command("nomad", "plugin", "status", pluginID)
   204  		if err != nil {
   205  			return false, err
   206  		}
   207  		return compare(out)
   208  	}, func(e error) {
   209  		err = fmt.Errorf("plugin status check failed: %v", e)
   210  	})
   211  	return err
   212  }
   213  
   214  // volumeRegister creates or registers a volume spec from a file but with a
   215  // unique ID. The caller is responsible for recording that ID for later
   216  // cleanup.
   217  func volumeRegister(volID, volFilePath, createOrRegister string) error {
   218  
   219  	// a CSI RPC to create a volume can take a long time because we
   220  	// have to wait on the AWS API to provision a disk, but a register
   221  	// should not because it only has to check the API for compatibility
   222  	timeout := time.Second * 30
   223  	if createOrRegister == "create" {
   224  		timeout = time.Minute * 2
   225  	}
   226  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   227  	defer cancel()
   228  
   229  	cmd := exec.CommandContext(ctx, "nomad", "volume", createOrRegister, "-")
   230  	stdin, err := cmd.StdinPipe()
   231  	if err != nil {
   232  		return fmt.Errorf("could not open stdin?: %w", err)
   233  	}
   234  
   235  	content, err := ioutil.ReadFile(volFilePath)
   236  	if err != nil {
   237  		return fmt.Errorf("could not open vol file: %w", err)
   238  	}
   239  
   240  	// hack off the first line to replace with our unique ID
   241  	var idRegex = regexp.MustCompile(`(?m)^id[\s]+= ".*"`)
   242  	volspec := idRegex.ReplaceAllString(string(content),
   243  		fmt.Sprintf("id = %q", volID))
   244  
   245  	// the EBS plugin uses the name as an idempotency token across the
   246  	// whole AWS account, so it has to be globally unique
   247  	var nameRegex = regexp.MustCompile(`(?m)^name[\s]+= ".*"`)
   248  	volspec = nameRegex.ReplaceAllString(volspec,
   249  		fmt.Sprintf("name = %q", uuid.Generate()))
   250  
   251  	go func() {
   252  		defer stdin.Close()
   253  		io.WriteString(stdin, volspec)
   254  	}()
   255  
   256  	out, err := cmd.CombinedOutput()
   257  	if err != nil {
   258  		return fmt.Errorf("could not register vol: %w\n%v", err, string(out))
   259  	}
   260  	return nil
   261  }