k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/e2e_node_suite_test.go (about) 1 //go:build linux 2 // +build linux 3 4 /* 5 Copyright 2016 The Kubernetes Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 // To run tests in this suite 21 // NOTE: This test suite requires password-less sudo capabilities to run the kubelet and kube-apiserver. 22 package e2enode 23 24 import ( 25 "bytes" 26 "context" 27 "encoding/json" 28 "flag" 29 "fmt" 30 31 "os" 32 "os/exec" 33 "syscall" 34 "testing" 35 "time" 36 37 v1 "k8s.io/api/core/v1" 38 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 39 utilyaml "k8s.io/apimachinery/pkg/util/yaml" 40 utilfeature "k8s.io/apiserver/pkg/util/feature" 41 clientset "k8s.io/client-go/kubernetes" 42 cliflag "k8s.io/component-base/cli/flag" 43 "k8s.io/kubernetes/pkg/util/rlimit" 44 commontest "k8s.io/kubernetes/test/e2e/common" 45 "k8s.io/kubernetes/test/e2e/framework" 46 e2econfig "k8s.io/kubernetes/test/e2e/framework/config" 47 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 48 e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" 49 e2etestingmanifests "k8s.io/kubernetes/test/e2e/testing-manifests" 50 "k8s.io/kubernetes/test/e2e_node/services" 51 e2enodetestingmanifests "k8s.io/kubernetes/test/e2e_node/testing-manifests" 52 system "k8s.io/system-validators/validators" 53 54 // define and freeze constants 55 _ "k8s.io/kubernetes/test/e2e/feature" 56 _ "k8s.io/kubernetes/test/e2e/nodefeature" 57 58 // reconfigure framework 59 _ "k8s.io/kubernetes/test/e2e/framework/debug/init" 60 _ "k8s.io/kubernetes/test/e2e/framework/metrics/init" 61 _ "k8s.io/kubernetes/test/e2e/framework/node/init" 62 _ "k8s.io/kubernetes/test/utils/format" 63 64 "github.com/onsi/ginkgo/v2" 65 "github.com/onsi/gomega" 66 "github.com/spf13/pflag" 67 "k8s.io/klog/v2" 68 ) 69 70 var ( 71 e2es *services.E2EServices 72 // featureGates is a map of feature names to bools that enable or disable alpha/experimental features. 73 featureGates map[string]bool 74 // serviceFeatureGates is a map of feature names to bools that enable or 75 // disable alpha/experimental features for API service. 76 serviceFeatureGates map[string]bool 77 78 // TODO(random-liu): Change the following modes to sub-command. 79 runServicesMode = flag.Bool("run-services-mode", false, "If true, only run services (etcd, apiserver) in current process, and not run test.") 80 runKubeletMode = flag.Bool("run-kubelet-mode", false, "If true, only start kubelet, and not run test.") 81 systemValidateMode = flag.Bool("system-validate-mode", false, "If true, only run system validation in current process, and not run test.") 82 systemSpecFile = flag.String("system-spec-file", "", "The name of the system spec file that will be used for node conformance test. If it's unspecified or empty, the default system spec (system.DefaultSysSpec) will be used.") 83 ) 84 85 // registerNodeFlags registers flags specific to the node e2e test suite. 86 func registerNodeFlags(flags *flag.FlagSet) { 87 // Mark the test as node e2e when node flags are api.Registry. 88 framework.TestContext.NodeE2E = true 89 flags.StringVar(&framework.TestContext.BearerToken, "bearer-token", "", "The bearer token to authenticate with. If not specified, it would be a random token. Currently this token is only used in node e2e tests.") 90 flags.StringVar(&framework.TestContext.NodeName, "node-name", "", "Name of the node to run tests on.") 91 flags.StringVar(&framework.TestContext.KubeletConfigDropinDir, "config-dir", "", "Path to a directory containing drop-in configurations for the kubelet.") 92 // TODO(random-liu): Move kubelet start logic out of the test. 93 // TODO(random-liu): Move log fetch logic out of the test. 94 // There are different ways to start kubelet (systemd, initd, docker, manually started etc.) 95 // and manage logs (journald, upstart etc.). 96 // For different situation we need to mount different things into the container, run different commands. 97 // It is hard and unnecessary to deal with the complexity inside the test suite. 98 flags.BoolVar(&framework.TestContext.NodeConformance, "conformance", false, "If true, the test suite will not start kubelet, and fetch system log (kernel, docker, kubelet log etc.) to the report directory.") 99 flags.BoolVar(&framework.TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.") 100 flags.BoolVar(&framework.TestContext.RestartKubelet, "restart-kubelet", false, "If true, restart Kubelet unit when the process is killed.") 101 flags.StringVar(&framework.TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.") 102 flags.StringVar(&framework.TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.") 103 flags.Var(cliflag.NewMapStringString(&framework.TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2") 104 flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.") 105 flag.StringVar(&framework.TestContext.ClusterDNSDomain, "dns-domain", "", "The DNS Domain of the cluster.") 106 flag.Var(cliflag.NewMapStringString(&framework.TestContext.RuntimeConfig), "runtime-config", "The runtime configuration used on node e2e tests.") 107 flags.BoolVar(&framework.TestContext.RequireDevices, "require-devices", false, "If true, require device plugins to be installed in the running environment.") 108 flags.Var(cliflag.NewMapStringBool(&featureGates), "feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features.") 109 flags.Var(cliflag.NewMapStringBool(&serviceFeatureGates), "service-feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features for API service.") 110 flags.BoolVar(&framework.TestContext.StandaloneMode, "standalone-mode", false, "If true, starts kubelet in standalone mode.") 111 } 112 113 func init() { 114 // Enable embedded FS file lookup as fallback 115 e2etestfiles.AddFileSource(e2etestingmanifests.GetE2ETestingManifestsFS()) 116 e2etestfiles.AddFileSource(e2enodetestingmanifests.GetE2ENodeTestingManifestsFS()) 117 } 118 119 func TestMain(m *testing.M) { 120 // Copy go flags in TestMain, to ensure go test flags are registered (no longer available in init() as of go1.13) 121 e2econfig.CopyFlags(e2econfig.Flags, flag.CommandLine) 122 framework.RegisterCommonFlags(flag.CommandLine) 123 registerNodeFlags(flag.CommandLine) 124 pflag.CommandLine.AddGoFlagSet(flag.CommandLine) 125 // Mark the run-services-mode flag as hidden to prevent user from using it. 126 pflag.CommandLine.MarkHidden("run-services-mode") 127 // It's weird that if I directly use pflag in TestContext, it will report error. 128 // It seems that someone is using flag.Parse() after init() and TestMain(). 129 // TODO(random-liu): Find who is using flag.Parse() and cause errors and move the following logic 130 // into TestContext. 131 // TODO(pohly): remove RegisterNodeFlags from test_context.go enable Viper config support here? 132 133 pflag.Parse() 134 if pflag.CommandLine.NArg() > 0 { 135 fmt.Fprintf(os.Stderr, "unknown additional command line arguments: %s", pflag.CommandLine.Args()) 136 os.Exit(1) 137 } 138 framework.AfterReadingAllFlags(&framework.TestContext) 139 if err := e2eskipper.InitFeatureGates(utilfeature.DefaultFeatureGate, featureGates); err != nil { 140 fmt.Fprintf(os.Stderr, "ERROR: initialize feature gates: %v", err) 141 os.Exit(1) 142 } 143 144 if err := services.SetFeatureGatesForInProcessComponents(serviceFeatureGates); err != nil { 145 fmt.Fprintf(os.Stderr, "ERROR: initialize process feature gates for API service: %v", err) 146 os.Exit(1) 147 } 148 149 setExtraEnvs() 150 os.Exit(m.Run()) 151 } 152 153 // When running the containerized conformance test, we'll mount the 154 // host root filesystem as readonly to /rootfs. 155 const rootfs = "/rootfs" 156 157 func TestE2eNode(t *testing.T) { 158 // Make sure we are not limited by sshd when it comes to open files 159 if err := rlimit.SetNumFiles(1000000); err != nil { 160 klog.Infof("failed to set rlimit on max file handles: %v", err) 161 } 162 163 if *runServicesMode { 164 // If run-services-mode is specified, only run services in current process. 165 services.RunE2EServices(t) 166 return 167 } 168 if *runKubeletMode { 169 // If run-kubelet-mode is specified, only start kubelet. 170 services.RunKubelet(featureGates) 171 return 172 } 173 if *systemValidateMode { 174 // If system-validate-mode is specified, only run system validation in current process. 175 spec := &system.DefaultSysSpec 176 if *systemSpecFile != "" { 177 var err error 178 spec, err = loadSystemSpecFromFile(*systemSpecFile) 179 if err != nil { 180 klog.Exitf("Failed to load system spec: %v", err) 181 } 182 } 183 if framework.TestContext.NodeConformance { 184 // Chroot to /rootfs to make system validation can check system 185 // as in the root filesystem. 186 // TODO(random-liu): Consider to chroot the whole test process to make writing 187 // test easier. 188 if err := syscall.Chroot(rootfs); err != nil { 189 klog.Exitf("chroot %q failed: %v", rootfs, err) 190 } 191 } 192 if _, err := system.ValidateSpec(*spec, "remote"); len(err) != 0 { 193 klog.Exitf("system validation failed: %v", err) 194 } 195 return 196 } 197 198 // We're not running in a special mode so lets run tests. 199 gomega.RegisterFailHandler(ginkgo.Fail) 200 // Initialize the KubeletConfigDropinDir again if the test doesn't run in run-kubelet-mode. 201 if framework.TestContext.KubeletConfigDropinDir == "" { 202 var err error 203 framework.TestContext.KubeletConfigDropinDir, err = services.KubeletConfigDirCWDDir() 204 if err != nil { 205 klog.Errorf("failed to create kubelet config directory: %v", err) 206 } 207 } 208 reportDir := framework.TestContext.ReportDir 209 if reportDir != "" { 210 // Create the directory if it doesn't already exist 211 // NOTE: junit report can be simply created by executing your tests with the new --junit-report flags instead. 212 if err := os.MkdirAll(reportDir, 0755); err != nil { 213 klog.Errorf("Failed creating report directory: %v", err) 214 } 215 } 216 suiteConfig, reporterConfig := framework.CreateGinkgoConfig() 217 ginkgo.RunSpecs(t, "E2eNode Suite", suiteConfig, reporterConfig) 218 } 219 220 // Setup the kubelet on the node 221 var _ = ginkgo.SynchronizedBeforeSuite(func(ctx context.Context) []byte { 222 // Run system validation test. 223 gomega.Expect(validateSystem()).To(gomega.Succeed(), "system validation") 224 225 // Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue 226 // This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling. 227 if framework.TestContext.PrepullImages { 228 klog.Infof("Pre-pulling images so that they are cached for the tests.") 229 updateImageAllowList(ctx) 230 err := PrePullAllImages() 231 gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) 232 } 233 234 // TODO(yifan): Temporary workaround to disable coreos from auto restart 235 // by masking the locksmithd. 236 // We should mask locksmithd when provisioning the machine. 237 maskLocksmithdOnCoreos() 238 239 if *startServices { 240 // If the services are expected to stop after test, they should monitor the test process. 241 // If the services are expected to keep running after test, they should not monitor the test process. 242 e2es = services.NewE2EServices(*stopServices) 243 gomega.Expect(e2es.Start(featureGates)).To(gomega.Succeed(), "should be able to start node services.") 244 } else { 245 klog.Infof("Running tests without starting services.") 246 } 247 248 if !framework.TestContext.StandaloneMode { 249 klog.Infof("Wait for the node to be ready") 250 waitForNodeReady(ctx) 251 } 252 253 // Reference common test to make the import valid. 254 commontest.CurrentSuite = commontest.NodeE2E 255 256 // ginkgo would spawn multiple processes to run tests. 257 // Since the bearer token is generated randomly at run time, 258 // we need to distribute the bearer token to other processes to make them use the same token. 259 return []byte(framework.TestContext.BearerToken) 260 }, func(ctx context.Context, token []byte) { 261 framework.TestContext.BearerToken = string(token) 262 // update test context with node configuration. 263 gomega.Expect(updateTestContext(ctx)).To(gomega.Succeed(), "update test context with node config.") 264 265 // Store current Kubelet configuration in the package variable 266 // This assumes all tests which dynamically change kubelet configuration 267 // must: 1) run in serial; 2) restore kubelet configuration after test. 268 var err error 269 kubeletCfg, err = getCurrentKubeletConfig(ctx) 270 framework.ExpectNoError(err) 271 }) 272 273 // Tear down the kubelet on the node 274 var _ = ginkgo.SynchronizedAfterSuite(func() {}, func() { 275 if e2es != nil { 276 if *startServices && *stopServices { 277 klog.Infof("Stopping node services...") 278 e2es.Stop() 279 } 280 } 281 282 klog.Infof("Tests Finished") 283 }) 284 285 // validateSystem runs system validation in a separate process and returns error if validation fails. 286 func validateSystem() error { 287 testBin, err := os.Executable() 288 if err != nil { 289 return fmt.Errorf("can't get current binary: %w", err) 290 } 291 // Pass all flags into the child process, so that it will see the same flag set. 292 output, err := exec.Command(testBin, append([]string{"--system-validate-mode"}, os.Args[1:]...)...).CombinedOutput() 293 // The output of system validation should have been formatted, directly print here. 294 fmt.Print(string(output)) 295 if err != nil { 296 return fmt.Errorf("system validation failed: %w", err) 297 } 298 return nil 299 } 300 301 func maskLocksmithdOnCoreos() { 302 data, err := os.ReadFile("/etc/os-release") 303 if err != nil { 304 // Not all distros contain this file. 305 klog.Infof("Could not read /etc/os-release: %v", err) 306 return 307 } 308 if bytes.Contains(data, []byte("ID=coreos")) { 309 output, err := exec.Command("systemctl", "mask", "--now", "locksmithd").CombinedOutput() 310 framework.ExpectNoError(err, fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output))) 311 klog.Infof("Locksmithd is masked successfully") 312 } 313 } 314 315 func waitForNodeReady(ctx context.Context) { 316 const ( 317 // nodeReadyTimeout is the time to wait for node to become ready. 318 nodeReadyTimeout = 2 * time.Minute 319 // nodeReadyPollInterval is the interval to check node ready. 320 nodeReadyPollInterval = 1 * time.Second 321 ) 322 client, err := getAPIServerClient() 323 framework.ExpectNoError(err, "should be able to get apiserver client.") 324 gomega.Eventually(ctx, func() error { 325 node, err := getNode(client) 326 if err != nil { 327 return fmt.Errorf("failed to get node: %w", err) 328 } 329 if !isNodeReady(node) { 330 return fmt.Errorf("node is not ready: %+v", node) 331 } 332 return nil 333 }, nodeReadyTimeout, nodeReadyPollInterval).Should(gomega.Succeed()) 334 } 335 336 // updateTestContext updates the test context with the node name. 337 func updateTestContext(ctx context.Context) error { 338 setExtraEnvs() 339 updateImageAllowList(ctx) 340 341 client, err := getAPIServerClient() 342 if err != nil { 343 return fmt.Errorf("failed to get apiserver client: %w", err) 344 } 345 346 if !framework.TestContext.StandaloneMode { 347 // Update test context with current node object. 348 node, err := getNode(client) 349 if err != nil { 350 return fmt.Errorf("failed to get node: %w", err) 351 } 352 framework.TestContext.NodeName = node.Name // Set node name from API server, it is already set to the computer name by default. 353 } 354 355 framework.Logf("Node name: %s", framework.TestContext.NodeName) 356 357 return nil 358 } 359 360 // getNode gets node object from the apiserver. 361 func getNode(c *clientset.Clientset) (*v1.Node, error) { 362 nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) 363 framework.ExpectNoError(err, "should be able to list nodes.") 364 if nodes == nil { 365 return nil, fmt.Errorf("the node list is nil") 366 } 367 gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically("<=", 1), "the number of nodes is more than 1.") 368 if len(nodes.Items) == 0 { 369 return nil, fmt.Errorf("empty node list: %+v", nodes) 370 } 371 return &nodes.Items[0], nil 372 } 373 374 // getAPIServerClient gets a apiserver client. 375 func getAPIServerClient() (*clientset.Clientset, error) { 376 config, err := framework.LoadConfig() 377 if err != nil { 378 return nil, fmt.Errorf("failed to load config: %w", err) 379 } 380 client, err := clientset.NewForConfig(config) 381 if err != nil { 382 return nil, fmt.Errorf("failed to create client: %w", err) 383 } 384 return client, nil 385 } 386 387 // loadSystemSpecFromFile returns the system spec from the file with the 388 // filename. 389 func loadSystemSpecFromFile(filename string) (*system.SysSpec, error) { 390 b, err := os.ReadFile(filename) 391 if err != nil { 392 return nil, err 393 } 394 data, err := utilyaml.ToJSON(b) 395 if err != nil { 396 return nil, err 397 } 398 spec := new(system.SysSpec) 399 if err := json.Unmarshal(data, spec); err != nil { 400 return nil, err 401 } 402 return spec, nil 403 } 404 405 // isNodeReady returns true if a node is ready; false otherwise. 406 func isNodeReady(node *v1.Node) bool { 407 for _, c := range node.Status.Conditions { 408 if c.Type == v1.NodeReady { 409 return c.Status == v1.ConditionTrue 410 } 411 } 412 return false 413 } 414 415 func setExtraEnvs() { 416 for name, value := range framework.TestContext.ExtraEnvs { 417 os.Setenv(name, value) 418 } 419 }