k8s.io/kubernetes@v1.29.3/test/e2e_node/e2e_node_suite_test.go (about) 1 //go:build linux 2 // +build linux 3 4 /* 5 Copyright 2016 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 // To run tests in this suite 21 // NOTE: This test suite requires password-less sudo capabilities to run the kubelet and kube-apiserver. 22 package e2enode 23 24 import ( 25 "bytes" 26 "context" 27 "encoding/json" 28 "flag" 29 "fmt" 30 31 "math/rand" 32 "os" 33 "os/exec" 34 "syscall" 35 "testing" 36 "time" 37 38 v1 "k8s.io/api/core/v1" 39 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 40 utilyaml "k8s.io/apimachinery/pkg/util/yaml" 41 utilfeature "k8s.io/apiserver/pkg/util/feature" 42 clientset "k8s.io/client-go/kubernetes" 43 cliflag "k8s.io/component-base/cli/flag" 44 "k8s.io/component-base/logs" 45 "k8s.io/kubernetes/pkg/util/rlimit" 46 commontest "k8s.io/kubernetes/test/e2e/common" 47 "k8s.io/kubernetes/test/e2e/framework" 48 e2econfig "k8s.io/kubernetes/test/e2e/framework/config" 49 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 50 e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" 51 e2etestingmanifests "k8s.io/kubernetes/test/e2e/testing-manifests" 52 "k8s.io/kubernetes/test/e2e_node/services" 53 e2enodetestingmanifests "k8s.io/kubernetes/test/e2e_node/testing-manifests" 54 system "k8s.io/system-validators/validators" 55 56 // define and freeze constants 57 _ "k8s.io/kubernetes/test/e2e/feature" 58 _ "k8s.io/kubernetes/test/e2e/nodefeature" 59 60 // reconfigure framework 61 _ "k8s.io/kubernetes/test/e2e/framework/debug/init" 62 _ "k8s.io/kubernetes/test/e2e/framework/metrics/init" 63 _ "k8s.io/kubernetes/test/e2e/framework/node/init" 64 _ "k8s.io/kubernetes/test/utils/format" 65 66 "github.com/onsi/ginkgo/v2" 67 "github.com/onsi/gomega" 68 "github.com/spf13/pflag" 69 "k8s.io/klog/v2" 70 ) 71 72 var ( 73 e2es *services.E2EServices 74 // featureGates is a map of feature names to bools that enable or disable alpha/experimental features. 75 featureGates map[string]bool 76 // serviceFeatureGates is a map of feature names to bools that enable or 77 // disable alpha/experimental features for API service. 78 serviceFeatureGates map[string]bool 79 80 // TODO(random-liu): Change the following modes to sub-command. 81 runServicesMode = flag.Bool("run-services-mode", false, "If true, only run services (etcd, apiserver) in current process, and not run test.") 82 runKubeletMode = flag.Bool("run-kubelet-mode", false, "If true, only start kubelet, and not run test.") 83 systemValidateMode = flag.Bool("system-validate-mode", false, "If true, only run system validation in current process, and not run test.") 84 systemSpecFile = flag.String("system-spec-file", "", "The name of the system spec file that will be used for node conformance test. If it's unspecified or empty, the default system spec (system.DefaultSysSpec) will be used.") 85 ) 86 87 // registerNodeFlags registers flags specific to the node e2e test suite. 88 func registerNodeFlags(flags *flag.FlagSet) { 89 // Mark the test as node e2e when node flags are api.Registry. 90 framework.TestContext.NodeE2E = true 91 flags.StringVar(&framework.TestContext.BearerToken, "bearer-token", "", "The bearer token to authenticate with. If not specified, it would be a random token. Currently this token is only used in node e2e tests.") 92 flags.StringVar(&framework.TestContext.NodeName, "node-name", "", "Name of the node to run tests on.") 93 flags.StringVar(&framework.TestContext.KubeletConfigDropinDir, "config-dir", "", "Path to a directory containing drop-in configurations for the kubelet.") 94 // TODO(random-liu): Move kubelet start logic out of the test. 95 // TODO(random-liu): Move log fetch logic out of the test. 96 // There are different ways to start kubelet (systemd, initd, docker, manually started etc.) 97 // and manage logs (journald, upstart etc.). 98 // For different situation we need to mount different things into the container, run different commands. 99 // It is hard and unnecessary to deal with the complexity inside the test suite. 100 flags.BoolVar(&framework.TestContext.NodeConformance, "conformance", false, "If true, the test suite will not start kubelet, and fetch system log (kernel, docker, kubelet log etc.) to the report directory.") 101 flags.BoolVar(&framework.TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.") 102 flags.BoolVar(&framework.TestContext.RestartKubelet, "restart-kubelet", false, "If true, restart Kubelet unit when the process is killed.") 103 flags.StringVar(&framework.TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.") 104 flags.StringVar(&framework.TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.") 105 flags.Var(cliflag.NewMapStringString(&framework.TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2") 106 flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.") 107 flag.StringVar(&framework.TestContext.ClusterDNSDomain, "dns-domain", "", "The DNS Domain of the cluster.") 108 flag.Var(cliflag.NewMapStringString(&framework.TestContext.RuntimeConfig), "runtime-config", "The runtime configuration used on node e2e tests.") 109 flags.BoolVar(&framework.TestContext.RequireDevices, "require-devices", false, "If true, require device plugins to be installed in the running environment.") 110 flags.Var(cliflag.NewMapStringBool(&featureGates), "feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features.") 111 flags.Var(cliflag.NewMapStringBool(&serviceFeatureGates), "service-feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features for API service.") 112 flags.BoolVar(&framework.TestContext.StandaloneMode, "standalone-mode", false, "If true, starts kubelet in standalone mode.") 113 } 114 115 func init() { 116 // Enable embedded FS file lookup as fallback 117 e2etestfiles.AddFileSource(e2etestingmanifests.GetE2ETestingManifestsFS()) 118 e2etestfiles.AddFileSource(e2enodetestingmanifests.GetE2ENodeTestingManifestsFS()) 119 } 120 121 func TestMain(m *testing.M) { 122 // Copy go flags in TestMain, to ensure go test flags are registered (no longer available in init() as of go1.13) 123 e2econfig.CopyFlags(e2econfig.Flags, flag.CommandLine) 124 framework.RegisterCommonFlags(flag.CommandLine) 125 registerNodeFlags(flag.CommandLine) 126 logs.AddFlags(pflag.CommandLine) 127 pflag.CommandLine.AddGoFlagSet(flag.CommandLine) 128 // Mark the run-services-mode flag as hidden to prevent user from using it. 129 pflag.CommandLine.MarkHidden("run-services-mode") 130 // It's weird that if I directly use pflag in TestContext, it will report error. 131 // It seems that someone is using flag.Parse() after init() and TestMain(). 132 // TODO(random-liu): Find who is using flag.Parse() and cause errors and move the following logic 133 // into TestContext. 134 // TODO(pohly): remove RegisterNodeFlags from test_context.go enable Viper config support here? 135 136 rand.Seed(time.Now().UnixNano()) 137 pflag.Parse() 138 if pflag.CommandLine.NArg() > 0 { 139 fmt.Fprintf(os.Stderr, "unknown additional command line arguments: %s", pflag.CommandLine.Args()) 140 os.Exit(1) 141 } 142 framework.AfterReadingAllFlags(&framework.TestContext) 143 if err := e2eskipper.InitFeatureGates(utilfeature.DefaultFeatureGate, featureGates); err != nil { 144 fmt.Fprintf(os.Stderr, "ERROR: initialize feature gates: %v", err) 145 os.Exit(1) 146 } 147 148 if err := services.SetFeatureGatesForInProcessComponents(serviceFeatureGates); err != nil { 149 fmt.Fprintf(os.Stderr, "ERROR: initialize process feature gates for API service: %v", err) 150 os.Exit(1) 151 } 152 153 setExtraEnvs() 154 os.Exit(m.Run()) 155 } 156 157 // When running the containerized conformance test, we'll mount the 158 // host root filesystem as readonly to /rootfs. 159 const rootfs = "/rootfs" 160 161 func TestE2eNode(t *testing.T) { 162 // Make sure we are not limited by sshd when it comes to open files 163 if err := rlimit.SetNumFiles(1000000); err != nil { 164 klog.Infof("failed to set rlimit on max file handles: %v", err) 165 } 166 167 if *runServicesMode { 168 // If run-services-mode is specified, only run services in current process. 169 services.RunE2EServices(t) 170 return 171 } 172 if *runKubeletMode { 173 // If run-kubelet-mode is specified, only start kubelet. 174 services.RunKubelet(featureGates) 175 return 176 } 177 if *systemValidateMode { 178 // If system-validate-mode is specified, only run system validation in current process. 179 spec := &system.DefaultSysSpec 180 if *systemSpecFile != "" { 181 var err error 182 spec, err = loadSystemSpecFromFile(*systemSpecFile) 183 if err != nil { 184 klog.Exitf("Failed to load system spec: %v", err) 185 } 186 } 187 if framework.TestContext.NodeConformance { 188 // Chroot to /rootfs to make system validation can check system 189 // as in the root filesystem. 190 // TODO(random-liu): Consider to chroot the whole test process to make writing 191 // test easier. 192 if err := syscall.Chroot(rootfs); err != nil { 193 klog.Exitf("chroot %q failed: %v", rootfs, err) 194 } 195 } 196 if _, err := system.ValidateSpec(*spec, "remote"); len(err) != 0 { 197 klog.Exitf("system validation failed: %v", err) 198 } 199 return 200 } 201 202 // We're not running in a special mode so lets run tests. 203 gomega.RegisterFailHandler(ginkgo.Fail) 204 // Initialize the KubeletConfigDropinDir again if the test doesn't run in run-kubelet-mode. 205 if framework.TestContext.KubeletConfigDropinDir == "" { 206 var err error 207 framework.TestContext.KubeletConfigDropinDir, err = services.KubeletConfigDirCWDDir() 208 if err != nil { 209 klog.Errorf("failed to create kubelet config directory: %v", err) 210 } 211 } 212 reportDir := framework.TestContext.ReportDir 213 if reportDir != "" { 214 // Create the directory if it doesn't already exist 215 // NOTE: junit report can be simply created by executing your tests with the new --junit-report flags instead. 216 if err := os.MkdirAll(reportDir, 0755); err != nil { 217 klog.Errorf("Failed creating report directory: %v", err) 218 } 219 } 220 suiteConfig, reporterConfig := framework.CreateGinkgoConfig() 221 ginkgo.RunSpecs(t, "E2eNode Suite", suiteConfig, reporterConfig) 222 } 223 224 // Setup the kubelet on the node 225 var _ = ginkgo.SynchronizedBeforeSuite(func(ctx context.Context) []byte { 226 // Run system validation test. 227 gomega.Expect(validateSystem()).To(gomega.Succeed(), "system validation") 228 229 // Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue 230 // This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling. 231 if framework.TestContext.PrepullImages { 232 klog.Infof("Pre-pulling images so that they are cached for the tests.") 233 updateImageAllowList(ctx) 234 err := PrePullAllImages() 235 gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) 236 } 237 238 // TODO(yifan): Temporary workaround to disable coreos from auto restart 239 // by masking the locksmithd. 240 // We should mask locksmithd when provisioning the machine. 241 maskLocksmithdOnCoreos() 242 243 if *startServices { 244 // If the services are expected to stop after test, they should monitor the test process. 245 // If the services are expected to keep running after test, they should not monitor the test process. 246 e2es = services.NewE2EServices(*stopServices) 247 gomega.Expect(e2es.Start(featureGates)).To(gomega.Succeed(), "should be able to start node services.") 248 } else { 249 klog.Infof("Running tests without starting services.") 250 } 251 252 if !framework.TestContext.StandaloneMode { 253 klog.Infof("Wait for the node to be ready") 254 waitForNodeReady(ctx) 255 } 256 257 // Reference common test to make the import valid. 258 commontest.CurrentSuite = commontest.NodeE2E 259 260 // ginkgo would spawn multiple processes to run tests. 261 // Since the bearer token is generated randomly at run time, 262 // we need to distribute the bearer token to other processes to make them use the same token. 263 return []byte(framework.TestContext.BearerToken) 264 }, func(ctx context.Context, token []byte) { 265 framework.TestContext.BearerToken = string(token) 266 // update test context with node configuration. 267 gomega.Expect(updateTestContext(ctx)).To(gomega.Succeed(), "update test context with node config.") 268 269 // Store current Kubelet configuration in the package variable 270 // This assumes all tests which dynamically change kubelet configuration 271 // must: 1) run in serial; 2) restore kubelet configuration after test. 272 var err error 273 kubeletCfg, err = getCurrentKubeletConfig(ctx) 274 framework.ExpectNoError(err) 275 }) 276 277 // Tear down the kubelet on the node 278 var _ = ginkgo.SynchronizedAfterSuite(func() {}, func() { 279 if e2es != nil { 280 if *startServices && *stopServices { 281 klog.Infof("Stopping node services...") 282 e2es.Stop() 283 } 284 } 285 286 klog.Infof("Tests Finished") 287 }) 288 289 // validateSystem runs system validation in a separate process and returns error if validation fails. 290 func validateSystem() error { 291 testBin, err := os.Executable() 292 if err != nil { 293 return fmt.Errorf("can't get current binary: %w", err) 294 } 295 // Pass all flags into the child process, so that it will see the same flag set. 296 output, err := exec.Command(testBin, append([]string{"--system-validate-mode"}, os.Args[1:]...)...).CombinedOutput() 297 // The output of system validation should have been formatted, directly print here. 298 fmt.Print(string(output)) 299 if err != nil { 300 return fmt.Errorf("system validation failed: %w", err) 301 } 302 return nil 303 } 304 305 func maskLocksmithdOnCoreos() { 306 data, err := os.ReadFile("/etc/os-release") 307 if err != nil { 308 // Not all distros contain this file. 309 klog.Infof("Could not read /etc/os-release: %v", err) 310 return 311 } 312 if bytes.Contains(data, []byte("ID=coreos")) { 313 output, err := exec.Command("systemctl", "mask", "--now", "locksmithd").CombinedOutput() 314 framework.ExpectNoError(err, fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output))) 315 klog.Infof("Locksmithd is masked successfully") 316 } 317 } 318 319 func waitForNodeReady(ctx context.Context) { 320 const ( 321 // nodeReadyTimeout is the time to wait for node to become ready. 322 nodeReadyTimeout = 2 * time.Minute 323 // nodeReadyPollInterval is the interval to check node ready. 324 nodeReadyPollInterval = 1 * time.Second 325 ) 326 client, err := getAPIServerClient() 327 framework.ExpectNoError(err, "should be able to get apiserver client.") 328 gomega.Eventually(ctx, func() error { 329 node, err := getNode(client) 330 if err != nil { 331 return fmt.Errorf("failed to get node: %w", err) 332 } 333 if !isNodeReady(node) { 334 return fmt.Errorf("node is not ready: %+v", node) 335 } 336 return nil 337 }, nodeReadyTimeout, nodeReadyPollInterval).Should(gomega.Succeed()) 338 } 339 340 // updateTestContext updates the test context with the node name. 341 func updateTestContext(ctx context.Context) error { 342 setExtraEnvs() 343 updateImageAllowList(ctx) 344 345 client, err := getAPIServerClient() 346 if err != nil { 347 return fmt.Errorf("failed to get apiserver client: %w", err) 348 } 349 350 if !framework.TestContext.StandaloneMode { 351 // Update test context with current node object. 352 node, err := getNode(client) 353 if err != nil { 354 return fmt.Errorf("failed to get node: %w", err) 355 } 356 framework.TestContext.NodeName = node.Name // Set node name from API server, it is already set to the computer name by default. 357 } 358 359 framework.Logf("Node name: %s", framework.TestContext.NodeName) 360 361 return nil 362 } 363 364 // getNode gets node object from the apiserver. 365 func getNode(c *clientset.Clientset) (*v1.Node, error) { 366 nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) 367 framework.ExpectNoError(err, "should be able to list nodes.") 368 if nodes == nil { 369 return nil, fmt.Errorf("the node list is nil") 370 } 371 gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically("<=", 1), "the number of nodes is more than 1.") 372 if len(nodes.Items) == 0 { 373 return nil, fmt.Errorf("empty node list: %+v", nodes) 374 } 375 return &nodes.Items[0], nil 376 } 377 378 // getAPIServerClient gets a apiserver client. 379 func getAPIServerClient() (*clientset.Clientset, error) { 380 config, err := framework.LoadConfig() 381 if err != nil { 382 return nil, fmt.Errorf("failed to load config: %w", err) 383 } 384 client, err := clientset.NewForConfig(config) 385 if err != nil { 386 return nil, fmt.Errorf("failed to create client: %w", err) 387 } 388 return client, nil 389 } 390 391 // loadSystemSpecFromFile returns the system spec from the file with the 392 // filename. 393 func loadSystemSpecFromFile(filename string) (*system.SysSpec, error) { 394 b, err := os.ReadFile(filename) 395 if err != nil { 396 return nil, err 397 } 398 data, err := utilyaml.ToJSON(b) 399 if err != nil { 400 return nil, err 401 } 402 spec := new(system.SysSpec) 403 if err := json.Unmarshal(data, spec); err != nil { 404 return nil, err 405 } 406 return spec, nil 407 } 408 409 // isNodeReady returns true if a node is ready; false otherwise. 410 func isNodeReady(node *v1.Node) bool { 411 for _, c := range node.Status.Conditions { 412 if c.Type == v1.NodeReady { 413 return c.Status == v1.ConditionTrue 414 } 415 } 416 return false 417 } 418 419 func setExtraEnvs() { 420 for name, value := range framework.TestContext.ExtraEnvs { 421 os.Setenv(name, value) 422 } 423 }