github.com/hernad/nomad@v1.6.112/e2e/csi/csi.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package csi
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"os/exec"
    13  	"regexp"
    14  	"strconv"
    15  	"strings"
    16  	"time"
    17  
    18  	"github.com/hernad/nomad/api"
    19  	e2e "github.com/hernad/nomad/e2e/e2eutil"
    20  	"github.com/hernad/nomad/e2e/framework"
    21  	"github.com/hernad/nomad/helper/uuid"
    22  	"github.com/hernad/nomad/testutil"
    23  )
    24  
    25  func init() {
    26  	framework.AddSuites(&framework.TestSuite{
    27  		Component:   "CSI",
    28  		CanRunLocal: true,
    29  		Consul:      false,
    30  		Cases: []framework.TestCase{
    31  			new(CSIControllerPluginEBSTest), // see ebs.go
    32  			new(CSINodeOnlyPluginEFSTest),   // see efs.go
    33  		},
    34  	})
    35  }
    36  
    37  const ns = ""
    38  
    39  var pluginAllocWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 12} // 1min
    40  var pluginWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36}      // 3min
    41  var reapWait = &e2e.WaitConfig{Interval: 5 * time.Second, Retries: 36}        // 3min
    42  
    43  // assertNoErrorElseDump calls a non-halting assert on the error and dumps the
    44  // plugin logs if it fails.
    45  func assertNoErrorElseDump(f *framework.F, err error, msg string, pluginJobIDs []string) {
    46  	if err != nil {
    47  		dumpLogs(pluginJobIDs)
    48  		f.Assert().NoError(err, fmt.Sprintf("%v: %v", msg, err))
    49  	}
    50  }
    51  
    52  // requireNoErrorElseDump calls a halting assert on the error and dumps the
    53  // plugin logs if it fails.
    54  func requireNoErrorElseDump(f *framework.F, err error, msg string, pluginJobIDs []string) {
    55  	if err != nil {
    56  		dumpLogs(pluginJobIDs)
    57  		f.NoError(err, fmt.Sprintf("%v: %v", msg, err))
    58  	}
    59  }
    60  
    61  func dumpLogs(pluginIDs []string) error {
    62  
    63  	for _, id := range pluginIDs {
    64  		allocs, err := e2e.AllocsForJob(id, ns)
    65  		if err != nil {
    66  			return fmt.Errorf("could not find allocs for plugin: %v", err)
    67  		}
    68  		for _, alloc := range allocs {
    69  			allocID := alloc["ID"]
    70  			out, err := e2e.AllocLogs(allocID, "", e2e.LogsStdErr)
    71  			if err != nil {
    72  				return fmt.Errorf("could not get logs for alloc: %v\n%s", err, out)
    73  			}
    74  			_, isCI := os.LookupEnv("CI")
    75  			if isCI {
    76  				fmt.Println("--------------------------------------")
    77  				fmt.Println("allocation logs:", allocID)
    78  				fmt.Println(out)
    79  				continue
    80  			}
    81  			f, err := os.Create(allocID + ".log")
    82  			if err != nil {
    83  				return fmt.Errorf("could not create log file: %v", err)
    84  			}
    85  			defer f.Close()
    86  			_, err = f.WriteString(out)
    87  			if err != nil {
    88  				return fmt.Errorf("could not write to log file: %v", err)
    89  			}
    90  			fmt.Printf("nomad alloc logs written to %s.log\n", allocID)
    91  		}
    92  	}
    93  	return nil
    94  }
    95  
    96  // waitForVolumeClaimRelease makes sure we don't try to re-claim a volume
    97  // that's in the process of being unpublished. we can't just wait for allocs
    98  // to stop, but need to wait for their claims to be released
    99  func waitForVolumeClaimRelease(volID string, wc *e2e.WaitConfig) error {
   100  	var out string
   101  	var err error
   102  	testutil.WaitForResultRetries(wc.Retries, func() (bool, error) {
   103  		time.Sleep(wc.Interval)
   104  		out, err = e2e.Command("nomad", "volume", "status", volID)
   105  		if err != nil {
   106  			return false, err
   107  		}
   108  		section, err := e2e.GetSection(out, "Allocations")
   109  		if err != nil {
   110  			return false, err
   111  		}
   112  		return strings.Contains(section, "No allocations placed"), nil
   113  	}, func(e error) {
   114  		if e == nil {
   115  			err = nil
   116  		}
   117  		err = fmt.Errorf("alloc claim was not released: %v\n%s", e, out)
   118  	})
   119  	return err
   120  }
   121  
   122  // TODO(tgross): replace this w/ AllocFS().Stat() after
   123  // https://github.com/hernad/nomad/issues/7365 is fixed
   124  func readFile(client *api.Client, allocID string, path string) (bytes.Buffer, error) {
   125  	var stdout, stderr bytes.Buffer
   126  	alloc, _, err := client.Allocations().Info(allocID, nil)
   127  	if err != nil {
   128  		return stdout, err
   129  	}
   130  	ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second)
   131  	defer cancelFn()
   132  
   133  	_, err = client.Allocations().Exec(ctx,
   134  		alloc, "task", false,
   135  		[]string{"cat", path},
   136  		os.Stdin, &stdout, &stderr,
   137  		make(chan api.TerminalSize), nil)
   138  	return stdout, err
   139  }
   140  
   141  func waitForPluginStatusMinNodeCount(pluginID string, minCount int, wc *e2e.WaitConfig) error {
   142  
   143  	return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) {
   144  		expected, err := e2e.GetField(out, "Nodes Expected")
   145  		if err != nil {
   146  			return false, err
   147  		}
   148  		expectedCount, err := strconv.Atoi(strings.TrimSpace(expected))
   149  		if err != nil {
   150  			return false, err
   151  		}
   152  		if expectedCount < minCount {
   153  			return false, fmt.Errorf(
   154  				"expected Nodes Expected >= %d, got %q", minCount, expected)
   155  		}
   156  		healthy, err := e2e.GetField(out, "Nodes Healthy")
   157  		if err != nil {
   158  			return false, err
   159  		}
   160  		if healthy != expected {
   161  			return false, fmt.Errorf(
   162  				"expected Nodes Healthy >= %d, got %q", minCount, healthy)
   163  		}
   164  		return true, nil
   165  	}, wc)
   166  }
   167  
   168  func waitForPluginStatusControllerCount(pluginID string, count int, wc *e2e.WaitConfig) error {
   169  
   170  	return waitForPluginStatusCompare(pluginID, func(out string) (bool, error) {
   171  
   172  		expected, err := e2e.GetField(out, "Controllers Expected")
   173  		if err != nil {
   174  			return false, err
   175  		}
   176  		expectedCount, err := strconv.Atoi(strings.TrimSpace(expected))
   177  		if err != nil {
   178  			return false, err
   179  		}
   180  		if expectedCount != count {
   181  			return false, fmt.Errorf(
   182  				"expected Controllers Expected = %d, got %d", count, expectedCount)
   183  		}
   184  		healthy, err := e2e.GetField(out, "Controllers Healthy")
   185  		if err != nil {
   186  			return false, err
   187  		}
   188  		healthyCount, err := strconv.Atoi(strings.TrimSpace(healthy))
   189  		if err != nil {
   190  			return false, err
   191  		}
   192  		if healthyCount != count {
   193  			return false, fmt.Errorf(
   194  				"expected Controllers Healthy = %d, got %d", count, healthyCount)
   195  		}
   196  		return true, nil
   197  
   198  	}, wc)
   199  }
   200  
   201  func waitForPluginStatusCompare(pluginID string, compare func(got string) (bool, error), wc *e2e.WaitConfig) error {
   202  	var err error
   203  	testutil.WaitForResultRetries(wc.Retries, func() (bool, error) {
   204  		time.Sleep(wc.Interval)
   205  		out, err := e2e.Command("nomad", "plugin", "status", pluginID)
   206  		if err != nil {
   207  			return false, err
   208  		}
   209  		return compare(out)
   210  	}, func(e error) {
   211  		err = fmt.Errorf("plugin status check failed: %v", e)
   212  	})
   213  	return err
   214  }
   215  
   216  // volumeRegister creates or registers a volume spec from a file but with a
   217  // unique ID. The caller is responsible for recording that ID for later
   218  // cleanup.
   219  func volumeRegister(volID, volFilePath, createOrRegister string) error {
   220  
   221  	// a CSI RPC to create a volume can take a long time because we
   222  	// have to wait on the AWS API to provision a disk, but a register
   223  	// should not because it only has to check the API for compatibility
   224  	timeout := time.Second * 30
   225  	if createOrRegister == "create" {
   226  		timeout = time.Minute * 2
   227  	}
   228  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   229  	defer cancel()
   230  
   231  	cmd := exec.CommandContext(ctx, "nomad", "volume", createOrRegister, "-")
   232  	stdin, err := cmd.StdinPipe()
   233  	if err != nil {
   234  		return fmt.Errorf("could not open stdin?: %w", err)
   235  	}
   236  
   237  	content, err := os.ReadFile(volFilePath)
   238  	if err != nil {
   239  		return fmt.Errorf("could not open vol file: %w", err)
   240  	}
   241  
   242  	// hack off the first line to replace with our unique ID
   243  	var idRegex = regexp.MustCompile(`(?m)^id[\s]+= ".*"`)
   244  	volspec := idRegex.ReplaceAllString(string(content),
   245  		fmt.Sprintf("id = %q", volID))
   246  
   247  	// the EBS plugin uses the name as an idempotency token across the
   248  	// whole AWS account, so it has to be globally unique
   249  	var nameRegex = regexp.MustCompile(`(?m)^name[\s]+= ".*"`)
   250  	volspec = nameRegex.ReplaceAllString(volspec,
   251  		fmt.Sprintf("name = %q", uuid.Generate()))
   252  
   253  	go func() {
   254  		defer stdin.Close()
   255  		io.WriteString(stdin, volspec)
   256  	}()
   257  
   258  	out, err := cmd.CombinedOutput()
   259  	if err != nil {
   260  		return fmt.Errorf("could not register vol: %w\n%v", err, string(out))
   261  	}
   262  	return nil
   263  }