k8s.io/kubernetes@v1.29.3/test/e2e_node/remote/remote.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package remote
    18  
    19  import (
    20  	"flag"
    21  	"fmt"
    22  	"io"
    23  	"os"
    24  	"os/exec"
    25  	"path/filepath"
    26  	"regexp"
    27  	"strings"
    28  	"time"
    29  
    30  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    31  	"k8s.io/klog/v2"
    32  )
    33  
    34  var testTimeout = flag.Duration("test-timeout", 45*time.Minute, "How long (in golang duration format) to wait for ginkgo tests to complete.")
    35  var resultsDir = flag.String("results-dir", "/tmp/", "Directory to scp test results to.")
    36  
    37  const archiveName = "e2e_node_test.tar.gz"
    38  
    39  func copyKubeletConfigIfExists(kubeletConfigFile, dstDir string) error {
    40  	srcStat, err := os.Stat(kubeletConfigFile)
    41  	if err != nil {
    42  		if os.IsNotExist(err) {
    43  			return nil
    44  		} else {
    45  			return err
    46  		}
    47  	}
    48  
    49  	if !srcStat.Mode().IsRegular() {
    50  		return fmt.Errorf("%s is not a regular file", kubeletConfigFile)
    51  	}
    52  
    53  	source, err := os.Open(kubeletConfigFile)
    54  	if err != nil {
    55  		return err
    56  	}
    57  	defer source.Close()
    58  
    59  	dst := filepath.Join(dstDir, "kubeletconfig.yaml")
    60  	destination, err := os.Create(dst)
    61  	if err != nil {
    62  		return err
    63  	}
    64  	defer destination.Close()
    65  
    66  	_, err = io.Copy(destination, source)
    67  	return err
    68  }
    69  
    70  // CreateTestArchive creates the archive package for the node e2e test.
    71  func CreateTestArchive(suite TestSuite, systemSpecName, kubeletConfigFile string) (string, error) {
    72  	klog.V(2).Infof("Building archive...")
    73  	tardir, err := os.MkdirTemp("", "node-e2e-archive")
    74  	if err != nil {
    75  		return "", fmt.Errorf("failed to create temporary directory %v", err)
    76  	}
    77  	defer os.RemoveAll(tardir)
    78  
    79  	err = copyKubeletConfigIfExists(kubeletConfigFile, tardir)
    80  	if err != nil {
    81  		return "", fmt.Errorf("failed to copy kubelet config: %w", err)
    82  	}
    83  
    84  	// Call the suite function to setup the test package.
    85  	err = suite.SetupTestPackage(tardir, systemSpecName)
    86  	if err != nil {
    87  		return "", fmt.Errorf("failed to setup test package %q: %w", tardir, err)
    88  	}
    89  
    90  	// Build the tar
    91  	out, err := exec.Command("tar", "-zcvf", archiveName, "-C", tardir, ".").CombinedOutput()
    92  	if err != nil {
    93  		return "", fmt.Errorf("failed to build tar %v.  Output:\n%s", err, out)
    94  	}
    95  
    96  	dir, err := os.Getwd()
    97  	if err != nil {
    98  		return "", fmt.Errorf("failed to get working directory %v", err)
    99  	}
   100  	return filepath.Join(dir, archiveName), nil
   101  }
   102  
   103  // RunRemote returns the command Output, whether the exit was ok, and any errors
   104  type RunRemoteConfig struct {
   105  	Suite                                                                                    TestSuite
   106  	Archive                                                                                  string
   107  	Host                                                                                     string
   108  	Cleanup                                                                                  bool
   109  	ImageDesc, JunitFileName, TestArgs, GinkgoArgs, SystemSpecName, ExtraEnvs, RuntimeConfig string
   110  }
   111  
   112  func RunRemote(cfg RunRemoteConfig) (string, bool, error) {
   113  	// Create the temp staging directory
   114  	klog.V(2).Infof("Staging test binaries on %q", cfg.Host)
   115  	workspace := newWorkspaceDir()
   116  	// Do not sudo here, so that we can use scp to copy test archive to the directory.
   117  	if output, err := SSHNoSudo(cfg.Host, "mkdir", workspace); err != nil {
   118  		// Exit failure with the error
   119  		return "", false, fmt.Errorf("failed to create workspace directory %q on Host %q: %v Output: %q", workspace, cfg.Host, err, output)
   120  	}
   121  	if cfg.Cleanup {
   122  		defer func() {
   123  			output, err := SSH(cfg.Host, "rm", "-rf", workspace)
   124  			if err != nil {
   125  				klog.Errorf("failed to cleanup workspace %q on Host %q: %v.  Output:\n%s", workspace, cfg.Host, err, output)
   126  			}
   127  		}()
   128  	}
   129  
   130  	// Copy the archive to the staging directory
   131  	if output, err := runSSHCommand(cfg.Host, "scp", cfg.Archive, fmt.Sprintf("%s:%s/", GetHostnameOrIP(cfg.Host), workspace)); err != nil {
   132  		// Exit failure with the error
   133  		return "", false, fmt.Errorf("failed to copy test archive: %v, Output: %q", err, output)
   134  	}
   135  
   136  	// Extract the archive
   137  	cmd := getSSHCommand(" && ",
   138  		fmt.Sprintf("cd %s", workspace),
   139  		fmt.Sprintf("tar -xzvf ./%s", archiveName),
   140  	)
   141  	klog.V(2).Infof("Extracting tar on %q", cfg.Host)
   142  	// Do not use sudo here, because `sudo tar -x` will recover the file ownership inside the tar ball, but
   143  	// we want the extracted files to be owned by the current user.
   144  	if output, err := SSHNoSudo(cfg.Host, "sh", "-c", cmd); err != nil {
   145  		// Exit failure with the error
   146  		return "", false, fmt.Errorf("failed to extract test archive: %v, Output: %q", err, output)
   147  	}
   148  
   149  	// Create the test result directory.
   150  	resultDir := filepath.Join(workspace, "results")
   151  	if output, err := SSHNoSudo(cfg.Host, "mkdir", resultDir); err != nil {
   152  		// Exit failure with the error
   153  		return "", false, fmt.Errorf("failed to create test result directory %q on Host %q: %v Output: %q", resultDir, cfg.Host, err, output)
   154  	}
   155  
   156  	klog.V(2).Infof("Running test on %q", cfg.Host)
   157  	output, err := cfg.Suite.RunTest(cfg.Host, workspace, resultDir, cfg.ImageDesc, cfg.JunitFileName, cfg.TestArgs,
   158  		cfg.GinkgoArgs, cfg.SystemSpecName, cfg.ExtraEnvs, cfg.RuntimeConfig, *testTimeout)
   159  
   160  	var aggErrs []error
   161  	// Do not log the Output here, let the caller deal with the test Output.
   162  	if err != nil {
   163  		aggErrs = append(aggErrs, err)
   164  		collectSystemLog(cfg.Host)
   165  	}
   166  
   167  	klog.V(2).Infof("Copying test artifacts from %q", cfg.Host)
   168  	scpErr := getTestArtifacts(cfg.Host, workspace)
   169  	if scpErr != nil {
   170  		aggErrs = append(aggErrs, scpErr)
   171  	}
   172  
   173  	return output, len(aggErrs) == 0, utilerrors.NewAggregate(aggErrs)
   174  }
   175  
   176  const (
   177  	// workspaceDirPrefix is the string prefix used in the workspace directory name.
   178  	workspaceDirPrefix = "node-e2e-"
   179  	// timestampFormat is the timestamp format used in the node e2e directory name.
   180  	timestampFormat = "20060102T150405"
   181  )
   182  
   183  func getTimestamp() string {
   184  	return fmt.Sprint(time.Now().Format(timestampFormat))
   185  }
   186  
   187  func newWorkspaceDir() string {
   188  	return filepath.Join("/tmp", workspaceDirPrefix+getTimestamp())
   189  }
   190  
   191  // GetTimestampFromWorkspaceDir parses the workspace directory name and gets the timestamp part of it.
   192  // This can later be used to name other artifacts (such as the
   193  // kubelet-${instance}.service systemd transient service used to launch
   194  // Kubelet) so that they can be matched to each other.
   195  func GetTimestampFromWorkspaceDir(dir string) string {
   196  	dirTimestamp := strings.TrimPrefix(filepath.Base(dir), workspaceDirPrefix)
   197  	re := regexp.MustCompile("^\\d{8}T\\d{6}$")
   198  	if re.MatchString(dirTimestamp) {
   199  		return dirTimestamp
   200  	}
   201  	// Fallback: if we can't find that timestamp, default to using Now()
   202  	return getTimestamp()
   203  }
   204  
   205  func getTestArtifacts(host, testDir string) error {
   206  	logPath := filepath.Join(*resultsDir, host)
   207  	if err := os.MkdirAll(logPath, 0755); err != nil {
   208  		return fmt.Errorf("failed to create log directory %q: %w", logPath, err)
   209  	}
   210  	// Copy logs (if any) to artifacts/hostname
   211  	if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/*.log", testDir)); err == nil {
   212  		if _, err := runSSHCommand(host, "scp", "-r", fmt.Sprintf("%s:%s/results/*.log", GetHostnameOrIP(host), testDir), logPath); err != nil {
   213  			return err
   214  		}
   215  	}
   216  	// Copy json files (if any) to artifacts.
   217  	if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil {
   218  		if _, err = runSSHCommand(host, "scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIP(host), testDir), *resultsDir); err != nil {
   219  			return err
   220  		}
   221  	}
   222  	// Copy junit results (if any) to artifacts
   223  	if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/junit*", testDir)); err == nil {
   224  		// Copy junit (if any) to the top of artifacts
   225  		if _, err = runSSHCommand(host, "scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIP(host), testDir), *resultsDir); err != nil {
   226  			return err
   227  		}
   228  	}
   229  	// Copy container logs to artifacts/hostname
   230  	if _, err := SSH(host, "chmod", "-R", "a+r", "/var/log/pods"); err == nil {
   231  		if _, err = runSSHCommand(host, "scp", "-r", fmt.Sprintf("%s:/var/log/pods/", GetHostnameOrIP(host)), logPath); err != nil {
   232  			return err
   233  		}
   234  	}
   235  	return nil
   236  }
   237  
   238  // collectSystemLog is a temporary hack to collect system log when encountered on
   239  // unexpected error.
   240  func collectSystemLog(host string) {
   241  	// Encountered an unexpected error. The remote test harness may not
   242  	// have finished retrieved and stored all the logs in this case. Try
   243  	// to get some logs for debugging purposes.
   244  	// TODO: This is a best-effort, temporary hack that only works for
   245  	// journald nodes. We should have a more robust way to collect logs.
   246  	var (
   247  		logName  = "system.log"
   248  		logPath  = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName)
   249  		destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName)
   250  	)
   251  	klog.V(2).Infof("Test failed unexpectedly. Attempting to retrieving system logs (only works for nodes with journald)")
   252  	// Try getting the system logs from journald and store it to a file.
   253  	// Don't reuse the original test directory on the remote host because
   254  	// it could've be been removed if the node was rebooted.
   255  	if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil {
   256  		klog.V(2).Infof("Got the system logs from journald; copying it back...")
   257  		if output, err := runSSHCommand(host, "scp", fmt.Sprintf("%s:%s", GetHostnameOrIP(host), logPath), destPath); err != nil {
   258  			klog.V(2).Infof("Failed to copy the log: err: %v, output: %q", err, output)
   259  		}
   260  	} else {
   261  		klog.V(2).Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output)
   262  	}
   263  }
   264  
   265  // WriteLog is a temporary function to make it possible to write log
   266  // in the runner. This is used to collect serial console log.
   267  // TODO(random-liu): Use the log-dump script in cluster e2e.
   268  func WriteLog(host, filename, content string) error {
   269  	logPath := filepath.Join(*resultsDir, host)
   270  	if err := os.MkdirAll(logPath, 0755); err != nil {
   271  		return fmt.Errorf("failed to create log directory %q: %w", logPath, err)
   272  	}
   273  	f, err := os.Create(filepath.Join(logPath, filename))
   274  	if err != nil {
   275  		return err
   276  	}
   277  	defer f.Close()
   278  	_, err = f.WriteString(content)
   279  	return err
   280  }