k8s.io/kubernetes@v1.29.3/test/e2e_node/remote/remote.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package remote 18 19 import ( 20 "flag" 21 "fmt" 22 "io" 23 "os" 24 "os/exec" 25 "path/filepath" 26 "regexp" 27 "strings" 28 "time" 29 30 utilerrors "k8s.io/apimachinery/pkg/util/errors" 31 "k8s.io/klog/v2" 32 ) 33 34 var testTimeout = flag.Duration("test-timeout", 45*time.Minute, "How long (in golang duration format) to wait for ginkgo tests to complete.") 35 var resultsDir = flag.String("results-dir", "/tmp/", "Directory to scp test results to.") 36 37 const archiveName = "e2e_node_test.tar.gz" 38 39 func copyKubeletConfigIfExists(kubeletConfigFile, dstDir string) error { 40 srcStat, err := os.Stat(kubeletConfigFile) 41 if err != nil { 42 if os.IsNotExist(err) { 43 return nil 44 } else { 45 return err 46 } 47 } 48 49 if !srcStat.Mode().IsRegular() { 50 return fmt.Errorf("%s is not a regular file", kubeletConfigFile) 51 } 52 53 source, err := os.Open(kubeletConfigFile) 54 if err != nil { 55 return err 56 } 57 defer source.Close() 58 59 dst := filepath.Join(dstDir, "kubeletconfig.yaml") 60 destination, err := os.Create(dst) 61 if err != nil { 62 return err 63 } 64 defer destination.Close() 65 66 _, err = io.Copy(destination, source) 67 return err 68 } 69 70 // CreateTestArchive creates the archive package for the node e2e test. 71 func CreateTestArchive(suite TestSuite, systemSpecName, kubeletConfigFile string) (string, error) { 72 klog.V(2).Infof("Building archive...") 73 tardir, err := os.MkdirTemp("", "node-e2e-archive") 74 if err != nil { 75 return "", fmt.Errorf("failed to create temporary directory %v", err) 76 } 77 defer os.RemoveAll(tardir) 78 79 err = copyKubeletConfigIfExists(kubeletConfigFile, tardir) 80 if err != nil { 81 return "", fmt.Errorf("failed to copy kubelet config: %w", err) 82 } 83 84 // Call the suite function to setup the test package. 85 err = suite.SetupTestPackage(tardir, systemSpecName) 86 if err != nil { 87 return "", fmt.Errorf("failed to setup test package %q: %w", tardir, err) 88 } 89 90 // Build the tar 91 out, err := exec.Command("tar", "-zcvf", archiveName, "-C", tardir, ".").CombinedOutput() 92 if err != nil { 93 return "", fmt.Errorf("failed to build tar %v. Output:\n%s", err, out) 94 } 95 96 dir, err := os.Getwd() 97 if err != nil { 98 return "", fmt.Errorf("failed to get working directory %v", err) 99 } 100 return filepath.Join(dir, archiveName), nil 101 } 102 103 // RunRemote returns the command Output, whether the exit was ok, and any errors 104 type RunRemoteConfig struct { 105 Suite TestSuite 106 Archive string 107 Host string 108 Cleanup bool 109 ImageDesc, JunitFileName, TestArgs, GinkgoArgs, SystemSpecName, ExtraEnvs, RuntimeConfig string 110 } 111 112 func RunRemote(cfg RunRemoteConfig) (string, bool, error) { 113 // Create the temp staging directory 114 klog.V(2).Infof("Staging test binaries on %q", cfg.Host) 115 workspace := newWorkspaceDir() 116 // Do not sudo here, so that we can use scp to copy test archive to the directory. 117 if output, err := SSHNoSudo(cfg.Host, "mkdir", workspace); err != nil { 118 // Exit failure with the error 119 return "", false, fmt.Errorf("failed to create workspace directory %q on Host %q: %v Output: %q", workspace, cfg.Host, err, output) 120 } 121 if cfg.Cleanup { 122 defer func() { 123 output, err := SSH(cfg.Host, "rm", "-rf", workspace) 124 if err != nil { 125 klog.Errorf("failed to cleanup workspace %q on Host %q: %v. Output:\n%s", workspace, cfg.Host, err, output) 126 } 127 }() 128 } 129 130 // Copy the archive to the staging directory 131 if output, err := runSSHCommand(cfg.Host, "scp", cfg.Archive, fmt.Sprintf("%s:%s/", GetHostnameOrIP(cfg.Host), workspace)); err != nil { 132 // Exit failure with the error 133 return "", false, fmt.Errorf("failed to copy test archive: %v, Output: %q", err, output) 134 } 135 136 // Extract the archive 137 cmd := getSSHCommand(" && ", 138 fmt.Sprintf("cd %s", workspace), 139 fmt.Sprintf("tar -xzvf ./%s", archiveName), 140 ) 141 klog.V(2).Infof("Extracting tar on %q", cfg.Host) 142 // Do not use sudo here, because `sudo tar -x` will recover the file ownership inside the tar ball, but 143 // we want the extracted files to be owned by the current user. 144 if output, err := SSHNoSudo(cfg.Host, "sh", "-c", cmd); err != nil { 145 // Exit failure with the error 146 return "", false, fmt.Errorf("failed to extract test archive: %v, Output: %q", err, output) 147 } 148 149 // Create the test result directory. 150 resultDir := filepath.Join(workspace, "results") 151 if output, err := SSHNoSudo(cfg.Host, "mkdir", resultDir); err != nil { 152 // Exit failure with the error 153 return "", false, fmt.Errorf("failed to create test result directory %q on Host %q: %v Output: %q", resultDir, cfg.Host, err, output) 154 } 155 156 klog.V(2).Infof("Running test on %q", cfg.Host) 157 output, err := cfg.Suite.RunTest(cfg.Host, workspace, resultDir, cfg.ImageDesc, cfg.JunitFileName, cfg.TestArgs, 158 cfg.GinkgoArgs, cfg.SystemSpecName, cfg.ExtraEnvs, cfg.RuntimeConfig, *testTimeout) 159 160 var aggErrs []error 161 // Do not log the Output here, let the caller deal with the test Output. 162 if err != nil { 163 aggErrs = append(aggErrs, err) 164 collectSystemLog(cfg.Host) 165 } 166 167 klog.V(2).Infof("Copying test artifacts from %q", cfg.Host) 168 scpErr := getTestArtifacts(cfg.Host, workspace) 169 if scpErr != nil { 170 aggErrs = append(aggErrs, scpErr) 171 } 172 173 return output, len(aggErrs) == 0, utilerrors.NewAggregate(aggErrs) 174 } 175 176 const ( 177 // workspaceDirPrefix is the string prefix used in the workspace directory name. 178 workspaceDirPrefix = "node-e2e-" 179 // timestampFormat is the timestamp format used in the node e2e directory name. 180 timestampFormat = "20060102T150405" 181 ) 182 183 func getTimestamp() string { 184 return fmt.Sprint(time.Now().Format(timestampFormat)) 185 } 186 187 func newWorkspaceDir() string { 188 return filepath.Join("/tmp", workspaceDirPrefix+getTimestamp()) 189 } 190 191 // GetTimestampFromWorkspaceDir parses the workspace directory name and gets the timestamp part of it. 192 // This can later be used to name other artifacts (such as the 193 // kubelet-${instance}.service systemd transient service used to launch 194 // Kubelet) so that they can be matched to each other. 195 func GetTimestampFromWorkspaceDir(dir string) string { 196 dirTimestamp := strings.TrimPrefix(filepath.Base(dir), workspaceDirPrefix) 197 re := regexp.MustCompile("^\\d{8}T\\d{6}$") 198 if re.MatchString(dirTimestamp) { 199 return dirTimestamp 200 } 201 // Fallback: if we can't find that timestamp, default to using Now() 202 return getTimestamp() 203 } 204 205 func getTestArtifacts(host, testDir string) error { 206 logPath := filepath.Join(*resultsDir, host) 207 if err := os.MkdirAll(logPath, 0755); err != nil { 208 return fmt.Errorf("failed to create log directory %q: %w", logPath, err) 209 } 210 // Copy logs (if any) to artifacts/hostname 211 if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/*.log", testDir)); err == nil { 212 if _, err := runSSHCommand(host, "scp", "-r", fmt.Sprintf("%s:%s/results/*.log", GetHostnameOrIP(host), testDir), logPath); err != nil { 213 return err 214 } 215 } 216 // Copy json files (if any) to artifacts. 217 if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil { 218 if _, err = runSSHCommand(host, "scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIP(host), testDir), *resultsDir); err != nil { 219 return err 220 } 221 } 222 // Copy junit results (if any) to artifacts 223 if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/junit*", testDir)); err == nil { 224 // Copy junit (if any) to the top of artifacts 225 if _, err = runSSHCommand(host, "scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIP(host), testDir), *resultsDir); err != nil { 226 return err 227 } 228 } 229 // Copy container logs to artifacts/hostname 230 if _, err := SSH(host, "chmod", "-R", "a+r", "/var/log/pods"); err == nil { 231 if _, err = runSSHCommand(host, "scp", "-r", fmt.Sprintf("%s:/var/log/pods/", GetHostnameOrIP(host)), logPath); err != nil { 232 return err 233 } 234 } 235 return nil 236 } 237 238 // collectSystemLog is a temporary hack to collect system log when encountered on 239 // unexpected error. 240 func collectSystemLog(host string) { 241 // Encountered an unexpected error. The remote test harness may not 242 // have finished retrieved and stored all the logs in this case. Try 243 // to get some logs for debugging purposes. 244 // TODO: This is a best-effort, temporary hack that only works for 245 // journald nodes. We should have a more robust way to collect logs. 246 var ( 247 logName = "system.log" 248 logPath = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName) 249 destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName) 250 ) 251 klog.V(2).Infof("Test failed unexpectedly. Attempting to retrieving system logs (only works for nodes with journald)") 252 // Try getting the system logs from journald and store it to a file. 253 // Don't reuse the original test directory on the remote host because 254 // it could've be been removed if the node was rebooted. 255 if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil { 256 klog.V(2).Infof("Got the system logs from journald; copying it back...") 257 if output, err := runSSHCommand(host, "scp", fmt.Sprintf("%s:%s", GetHostnameOrIP(host), logPath), destPath); err != nil { 258 klog.V(2).Infof("Failed to copy the log: err: %v, output: %q", err, output) 259 } 260 } else { 261 klog.V(2).Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output) 262 } 263 } 264 265 // WriteLog is a temporary function to make it possible to write log 266 // in the runner. This is used to collect serial console log. 267 // TODO(random-liu): Use the log-dump script in cluster e2e. 268 func WriteLog(host, filename, content string) error { 269 logPath := filepath.Join(*resultsDir, host) 270 if err := os.MkdirAll(logPath, 0755); err != nil { 271 return fmt.Errorf("failed to create log directory %q: %w", logPath, err) 272 } 273 f, err := os.Create(filepath.Join(logPath, filename)) 274 if err != nil { 275 return err 276 } 277 defer f.Close() 278 _, err = f.WriteString(content) 279 return err 280 }