github.com/verrazzano/verrazzano@v1.7.1/tests/e2e/pkg/test/clusterdump/clusterdump.go (about)

     1  // Copyright (c) 2021, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package clusterdump
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"github.com/verrazzano/verrazzano/tests/e2e/pkg"
    10  	"github.com/verrazzano/verrazzano/tests/e2e/pkg/test/framework"
    11  	"os"
    12  	"os/exec"
    13  	"path/filepath"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/onsi/ginkgo/v2"
    18  	"github.com/verrazzano/verrazzano/tests/e2e/pkg/test"
    19  )
    20  
    21  const (
    22  	AnalysisReport = "analysis.report"
    23  	BugReport      = "bug-report.tar.gz"
    24  	FullCluster    = "full-cluster"
    25  	BugReportDir   = "bug-report"
    26  )
    27  
    28  // ClusterDumpWrapper creates cluster snapshots if the test fails (spec or aftersuite)
    29  // A maximum of two cluster snapshots will be generated:
    30  // - snapshot if any spec in the suite fails
    31  // - snapshot if the aftersuite fails
    32  type ClusterDumpWrapper struct {
    33  	failed            bool
    34  	beforeSuitePassed bool
    35  	namespaces        []string
    36  	tf                *framework.TestFramework
    37  }
    38  
    39  func NewClusterDumpWrapper(tf *framework.TestFramework, ns ...string) *ClusterDumpWrapper {
    40  	clusterDump := ClusterDumpWrapper{
    41  		namespaces: ns,
    42  		tf:         tf,
    43  	}
    44  	return &clusterDump
    45  }
    46  
    47  func (c *ClusterDumpWrapper) BeforeSuiteFunc(body func()) func() {
    48  	return c.tf.BeforeSuiteFunc(func() {
    49  		body()
    50  		c.beforeSuitePassed = true
    51  	})
    52  }
    53  
    54  // AfterEach wraps ginkgo.AfterEach
    55  // usage: var _ = c.AfterEach(t, func() { ...after each logic... })
    56  func (c *ClusterDumpWrapper) AfterEach(body func()) bool {
    57  	return c.tf.AfterEach(func() {
    58  		c.failed = c.failed || ginkgo.CurrentSpecReport().Failed()
    59  		body()
    60  	})
    61  }
    62  
    63  // AfterSuiteFunc wraps a function to emit a cluster dump if the suite failed
    64  // usage: var afterSuite = c.AfterSuiteFunc(t, func() { ...after suite logic... })
    65  func (c *ClusterDumpWrapper) AfterSuiteFunc(body func()) func() {
    66  	// Capture full cluster snapshot when environment variable CAPTURE_FULL_CLUSTER is set
    67  	isFullCapture := os.Getenv("CAPTURE_FULL_CLUSTER")
    68  	return c.tf.AfterSuiteFunc(func() {
    69  		if c.failed || !c.beforeSuitePassed {
    70  			dirSuffix := fmt.Sprintf("fail-%d", time.Now().Unix())
    71  			if strings.EqualFold(isFullCapture, "true") {
    72  				executeClusterDumpWithEnvVarSuffix(dirSuffix)
    73  			}
    74  			executeBugReportWithDirectorySuffix(dirSuffix, c.namespaces...)
    75  		}
    76  
    77  		// ginkgo.Fail and gomega matchers panic if they fail. Recover is used to capture the panic and
    78  		// generate the cluster snapshot
    79  		defer func() {
    80  			if r := recover(); r != nil {
    81  				dirSuffix := fmt.Sprintf("aftersuite-%d", time.Now().Unix())
    82  				if strings.EqualFold(isFullCapture, "true") {
    83  					executeClusterDumpWithEnvVarSuffix(dirSuffix)
    84  				}
    85  				executeBugReportWithDirectorySuffix(dirSuffix, c.namespaces...)
    86  			}
    87  		}()
    88  		body()
    89  	})
    90  }
    91  
    92  // executeClusterDump executes the cluster dump tool.
    93  // clusterDumpCommand - The fully qualified cluster dump executable.
    94  // kubeConfig - The kube config file to use when executing the cluster dump tool.
    95  // clusterDumpDirectory - The directory to store the cluster dump within.
    96  func executeClusterDump(clusterDumpCommand string, kubeConfig string, clusterDumpDirectory string) error {
    97  	var cmd *exec.Cmd
    98  	fmt.Printf("Execute cluster dump: KUBECONFIG=%s; %s -d %s\n", kubeConfig, clusterDumpCommand, clusterDumpDirectory)
    99  	if clusterDumpCommand == "" {
   100  		return nil
   101  	}
   102  	reportFile := filepath.Join(clusterDumpDirectory, AnalysisReport)
   103  	if err := os.MkdirAll(clusterDumpDirectory, 0755); err != nil {
   104  		return err
   105  	}
   106  
   107  	cmd = exec.Command(clusterDumpCommand, "-d", clusterDumpDirectory, "-r", reportFile)
   108  	cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeConfig))
   109  	cmd.Stdout = os.Stdout
   110  	cmd.Stderr = os.Stderr
   111  	if err := cmd.Start(); err != nil {
   112  		return err
   113  	}
   114  	if err := cmd.Wait(); err != nil {
   115  		return err
   116  	}
   117  
   118  	return nil
   119  }
   120  
   121  // executeBugReport executes the bug-report CLI to capture the cluster resources and analyze on the bug report
   122  // vzCommand - The fully qualified bug report executable.
   123  // kubeConfig - The kube config file to use when executing the bug-report CLI.
   124  // bugReportDirectory - The directory to store the bug report within.
   125  // ns - One or more additional namespaces, from where the resources need to be captured by the bug-report CLI
   126  func executeBugReport(vzCommand string, kubeConfig string, bugReportDirectory string, ns ...string) error {
   127  	var cmd *exec.Cmd
   128  	if vzCommand == "" {
   129  		return nil
   130  	}
   131  
   132  	filename := filepath.Join(bugReportDirectory, BugReport)
   133  	if err := os.MkdirAll(bugReportDirectory, 0755); err != nil {
   134  		return err
   135  	}
   136  
   137  	if len(ns) > 0 {
   138  		includeNS := strings.Join(ns[:], ",")
   139  		cmd = exec.Command(vzCommand, "bug-report", "--report-file", filename, "--include-namespaces", includeNS, "--include-logs")
   140  	} else {
   141  		cmd = exec.Command(vzCommand, "bug-report", "--report-file", filename)
   142  	}
   143  	cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeConfig))
   144  	cmd.Stdout = os.Stdout
   145  	cmd.Stderr = os.Stderr
   146  
   147  	if err := cmd.Start(); err != nil {
   148  		fmt.Printf("Failed to start the command bug-report: %v \n", err)
   149  		return err
   150  	}
   151  	if err := cmd.Wait(); err != nil {
   152  		fmt.Printf("Failed waiting for the command bug-report: %v \n", err)
   153  		return err
   154  	}
   155  	// Extract the bug-report and run vz-analyze
   156  	err := analyzeBugReport(kubeConfig, vzCommand, bugReportDirectory)
   157  	if err != nil {
   158  		return err
   159  	}
   160  	return nil
   161  }
   162  
   163  // analyzeBugReport extracts the bug report and runs vz analyze by providing the extracted directory for flag --capture-dir
   164  func analyzeBugReport(kubeConfig, vzCommand, bugReportDirectory string) error {
   165  	bugReportFile := filepath.Join(bugReportDirectory, BugReport)
   166  
   167  	cmd := exec.Command("tar", "-xf", bugReportFile, "-C", bugReportDirectory)
   168  	cmd.Stdout = os.Stdout
   169  	cmd.Stderr = os.Stderr
   170  	cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeConfig))
   171  
   172  	if err := cmd.Start(); err != nil {
   173  		fmt.Printf("Failed to start the command to extract the bug report %v \n", err)
   174  		return err
   175  	}
   176  	if err := cmd.Wait(); err != nil {
   177  		fmt.Printf("Failed waiting for the command to extract the bug report %v \n", err)
   178  		return err
   179  	}
   180  
   181  	// Safe to remove bugReportFile
   182  	os.Remove(filepath.Join(bugReportFile))
   183  	reportFile := filepath.Join(bugReportDirectory, AnalysisReport)
   184  	cmd = exec.Command(vzCommand, "analyze", "--capture-dir", bugReportDirectory, "--report-format", "detailed", "--report-file", reportFile)
   185  	if err := cmd.Start(); err != nil {
   186  		fmt.Printf("Failed to start the command analyze %v \n", err)
   187  		return err
   188  	}
   189  	if err := cmd.Wait(); err != nil {
   190  		fmt.Printf("Failed waiting for the command analyze %v \n", err)
   191  		return err
   192  	}
   193  	return nil
   194  }
   195  
   196  func executeClusterDumpWithEnvVarSuffix(directorySuffix string) error {
   197  	kubeConfig := os.Getenv("DUMP_KUBECONFIG")
   198  	clusterDumpDirectory := filepath.Join(os.Getenv("DUMP_DIRECTORY"), directorySuffix, FullCluster)
   199  	clusterDumpCommand := os.Getenv("DUMP_COMMAND")
   200  	return executeClusterDump(clusterDumpCommand, kubeConfig, clusterDumpDirectory)
   201  }
   202  
   203  // executeBugReportWithDirectorySuffix executes the bug-report CLI.
   204  // directorySuffix - The suffix for the directory where the bug-report CLI needs to create the report file.
   205  // ns - One or more additional namespaces, from where the resources need to be captured by the bug-report CLI
   206  func executeBugReportWithDirectorySuffix(directorySuffix string, ns ...string) error {
   207  	kubeConfig := os.Getenv("DUMP_KUBECONFIG")
   208  	bugReportDirectory := filepath.Join(os.Getenv("DUMP_DIRECTORY"), directorySuffix, BugReportDir)
   209  	vzCommand := os.Getenv("VZ_COMMAND")
   210  	return executeBugReport(vzCommand, kubeConfig, bugReportDirectory, ns...)
   211  }
   212  
   213  // ExecuteBugReport executes the cluster bug-report CLI using config from environment variables.
   214  // DUMP_KUBECONFIG - The kube config file to use when executing the bug-report CLI.
   215  // DUMP_DIRECTORY - The directory to store the cluster snapshot within.
   216  // DUMP_COMMAND - The fully qualified cluster snapshot executable.
   217  // One or more additional namespaces specified using ns are set for the flag --include-namespaces
   218  func ExecuteBugReport(ns ...string) error {
   219  	var err1, err2 error
   220  	// Capture full cluster snapshot when environment variable CAPTURE_FULL_CLUSTER is set
   221  	isFullCapture := os.Getenv("CAPTURE_FULL_CLUSTER")
   222  	if strings.EqualFold(isFullCapture, "true") {
   223  		err1 = executeClusterDumpWithEnvVarSuffix("")
   224  	}
   225  	err2 = executeBugReportWithDirectorySuffix("", ns...)
   226  	cumulativeError := ""
   227  	if err1 != nil || err2 != nil {
   228  		if err1 != nil {
   229  			cumulativeError += err1.Error() + ";"
   230  		}
   231  		if err2 != nil {
   232  			cumulativeError += err2.Error() + ";"
   233  		}
   234  		return errors.New(cumulativeError)
   235  	}
   236  	return nil
   237  }
   238  
   239  // CaptureContainerLogs executes a "kubectl cp" command to copy a container's log directories to a local path on disk for examination.
   240  // This utilizes the cluster snapshot directory and KUBECONFIG settings do capture the logs to the same path as the cluster snapshot location;
   241  // the container log directory is copied to DUMP_DIRECTORY/podName.
   242  //
   243  // namespace - The namespace of the target pod
   244  // podName - The name of the pod
   245  // containerName - The target container name within the pod
   246  // containerLogsDir - The logs directory location within the container
   247  //
   248  // DUMP_KUBECONFIG - The kube config file to use when executing the bug-report CLI.
   249  // DUMP_DIRECTORY - The directory to store the cluster snapshot within.
   250  func CaptureContainerLogs(namespace string, podName string, containerName string, containerLogsDir string) {
   251  	directory := os.Getenv(test.DumpDirectoryEnvVarName)
   252  	kubeConfig := os.Getenv(test.DumpKubeconfigEnvVarName)
   253  
   254  	containerPath := fmt.Sprintf("%s/%s:%s", namespace, podName, containerLogsDir)
   255  	destDir := fmt.Sprintf("%s/%s/%s", directory, podName, containerName)
   256  
   257  	cmd := exec.Command("kubectl", "cp", containerPath, "-c", containerName, destDir)
   258  	pkg.Log(pkg.Info, fmt.Sprintf("kubectl command to capture %s logs: %s", podName, cmd.String()))
   259  	cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeConfig))
   260  	cmd.Stdout = os.Stdout
   261  	cmd.Stderr = os.Stderr
   262  	if err := cmd.Start(); err != nil {
   263  		pkg.Log(pkg.Info, fmt.Sprintf("Error START kubectl %s end log copy, err: %s", podName, err))
   264  	}
   265  	if err := cmd.Wait(); err != nil {
   266  		pkg.Log(pkg.Info, fmt.Sprintf("Error WAIT kubectl %s end log copy, err: %s", podName, err))
   267  	}
   268  }