k8s.io/kubernetes@v1.29.3/test/e2e_node/e2e_node_suite_test.go

k8s.io/kubernetes@v1.29.3/test/e2e_node/e2e_node_suite_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2016 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  // To run tests in this suite
    21  // NOTE: This test suite requires password-less sudo capabilities to run the kubelet and kube-apiserver.
    22  package e2enode
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"encoding/json"
    28  	"flag"
    29  	"fmt"
    30  
    31  	"math/rand"
    32  	"os"
    33  	"os/exec"
    34  	"syscall"
    35  	"testing"
    36  	"time"
    37  
    38  	v1 "k8s.io/api/core/v1"
    39  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    40  	utilyaml "k8s.io/apimachinery/pkg/util/yaml"
    41  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    42  	clientset "k8s.io/client-go/kubernetes"
    43  	cliflag "k8s.io/component-base/cli/flag"
    44  	"k8s.io/component-base/logs"
    45  	"k8s.io/kubernetes/pkg/util/rlimit"
    46  	commontest "k8s.io/kubernetes/test/e2e/common"
    47  	"k8s.io/kubernetes/test/e2e/framework"
    48  	e2econfig "k8s.io/kubernetes/test/e2e/framework/config"
    49  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    50  	e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
    51  	e2etestingmanifests "k8s.io/kubernetes/test/e2e/testing-manifests"
    52  	"k8s.io/kubernetes/test/e2e_node/services"
    53  	e2enodetestingmanifests "k8s.io/kubernetes/test/e2e_node/testing-manifests"
    54  	system "k8s.io/system-validators/validators"
    55  
    56  	// define and freeze constants
    57  	_ "k8s.io/kubernetes/test/e2e/feature"
    58  	_ "k8s.io/kubernetes/test/e2e/nodefeature"
    59  
    60  	// reconfigure framework
    61  	_ "k8s.io/kubernetes/test/e2e/framework/debug/init"
    62  	_ "k8s.io/kubernetes/test/e2e/framework/metrics/init"
    63  	_ "k8s.io/kubernetes/test/e2e/framework/node/init"
    64  	_ "k8s.io/kubernetes/test/utils/format"
    65  
    66  	"github.com/onsi/ginkgo/v2"
    67  	"github.com/onsi/gomega"
    68  	"github.com/spf13/pflag"
    69  	"k8s.io/klog/v2"
    70  )
    71  
    72  var (
    73  	e2es *services.E2EServices
    74  	// featureGates is a map of feature names to bools that enable or disable alpha/experimental features.
    75  	featureGates map[string]bool
    76  	// serviceFeatureGates is a map of feature names to bools that enable or
    77  	// disable alpha/experimental features for API service.
    78  	serviceFeatureGates map[string]bool
    79  
    80  	// TODO(random-liu): Change the following modes to sub-command.
    81  	runServicesMode    = flag.Bool("run-services-mode", false, "If true, only run services (etcd, apiserver) in current process, and not run test.")
    82  	runKubeletMode     = flag.Bool("run-kubelet-mode", false, "If true, only start kubelet, and not run test.")
    83  	systemValidateMode = flag.Bool("system-validate-mode", false, "If true, only run system validation in current process, and not run test.")
    84  	systemSpecFile     = flag.String("system-spec-file", "", "The name of the system spec file that will be used for node conformance test. If it's unspecified or empty, the default system spec (system.DefaultSysSpec) will be used.")
    85  )
    86  
    87  // registerNodeFlags registers flags specific to the node e2e test suite.
    88  func registerNodeFlags(flags *flag.FlagSet) {
    89  	// Mark the test as node e2e when node flags are api.Registry.
    90  	framework.TestContext.NodeE2E = true
    91  	flags.StringVar(&framework.TestContext.BearerToken, "bearer-token", "", "The bearer token to authenticate with. If not specified, it would be a random token. Currently this token is only used in node e2e tests.")
    92  	flags.StringVar(&framework.TestContext.NodeName, "node-name", "", "Name of the node to run tests on.")
    93  	flags.StringVar(&framework.TestContext.KubeletConfigDropinDir, "config-dir", "", "Path to a directory containing drop-in configurations for the kubelet.")
    94  	// TODO(random-liu): Move kubelet start logic out of the test.
    95  	// TODO(random-liu): Move log fetch logic out of the test.
    96  	// There are different ways to start kubelet (systemd, initd, docker, manually started etc.)
    97  	// and manage logs (journald, upstart etc.).
    98  	// For different situation we need to mount different things into the container, run different commands.
    99  	// It is hard and unnecessary to deal with the complexity inside the test suite.
   100  	flags.BoolVar(&framework.TestContext.NodeConformance, "conformance", false, "If true, the test suite will not start kubelet, and fetch system log (kernel, docker, kubelet log etc.) to the report directory.")
   101  	flags.BoolVar(&framework.TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
   102  	flags.BoolVar(&framework.TestContext.RestartKubelet, "restart-kubelet", false, "If true, restart Kubelet unit when the process is killed.")
   103  	flags.StringVar(&framework.TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
   104  	flags.StringVar(&framework.TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
   105  	flags.Var(cliflag.NewMapStringString(&framework.TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
   106  	flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.")
   107  	flag.StringVar(&framework.TestContext.ClusterDNSDomain, "dns-domain", "", "The DNS Domain of the cluster.")
   108  	flag.Var(cliflag.NewMapStringString(&framework.TestContext.RuntimeConfig), "runtime-config", "The runtime configuration used on node e2e tests.")
   109  	flags.BoolVar(&framework.TestContext.RequireDevices, "require-devices", false, "If true, require device plugins to be installed in the running environment.")
   110  	flags.Var(cliflag.NewMapStringBool(&featureGates), "feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features.")
   111  	flags.Var(cliflag.NewMapStringBool(&serviceFeatureGates), "service-feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features for API service.")
   112  	flags.BoolVar(&framework.TestContext.StandaloneMode, "standalone-mode", false, "If true, starts kubelet in standalone mode.")
   113  }
   114  
   115  func init() {
   116  	// Enable embedded FS file lookup as fallback
   117  	e2etestfiles.AddFileSource(e2etestingmanifests.GetE2ETestingManifestsFS())
   118  	e2etestfiles.AddFileSource(e2enodetestingmanifests.GetE2ENodeTestingManifestsFS())
   119  }
   120  
   121  func TestMain(m *testing.M) {
   122  	// Copy go flags in TestMain, to ensure go test flags are registered (no longer available in init() as of go1.13)
   123  	e2econfig.CopyFlags(e2econfig.Flags, flag.CommandLine)
   124  	framework.RegisterCommonFlags(flag.CommandLine)
   125  	registerNodeFlags(flag.CommandLine)
   126  	logs.AddFlags(pflag.CommandLine)
   127  	pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
   128  	// Mark the run-services-mode flag as hidden to prevent user from using it.
   129  	pflag.CommandLine.MarkHidden("run-services-mode")
   130  	// It's weird that if I directly use pflag in TestContext, it will report error.
   131  	// It seems that someone is using flag.Parse() after init() and TestMain().
   132  	// TODO(random-liu): Find who is using flag.Parse() and cause errors and move the following logic
   133  	// into TestContext.
   134  	// TODO(pohly): remove RegisterNodeFlags from test_context.go enable Viper config support here?
   135  
   136  	rand.Seed(time.Now().UnixNano())
   137  	pflag.Parse()
   138  	if pflag.CommandLine.NArg() > 0 {
   139  		fmt.Fprintf(os.Stderr, "unknown additional command line arguments: %s", pflag.CommandLine.Args())
   140  		os.Exit(1)
   141  	}
   142  	framework.AfterReadingAllFlags(&framework.TestContext)
   143  	if err := e2eskipper.InitFeatureGates(utilfeature.DefaultFeatureGate, featureGates); err != nil {
   144  		fmt.Fprintf(os.Stderr, "ERROR: initialize feature gates: %v", err)
   145  		os.Exit(1)
   146  	}
   147  
   148  	if err := services.SetFeatureGatesForInProcessComponents(serviceFeatureGates); err != nil {
   149  		fmt.Fprintf(os.Stderr, "ERROR: initialize process feature gates for API service: %v", err)
   150  		os.Exit(1)
   151  	}
   152  
   153  	setExtraEnvs()
   154  	os.Exit(m.Run())
   155  }
   156  
   157  // When running the containerized conformance test, we'll mount the
   158  // host root filesystem as readonly to /rootfs.
   159  const rootfs = "/rootfs"
   160  
   161  func TestE2eNode(t *testing.T) {
   162  	// Make sure we are not limited by sshd when it comes to open files
   163  	if err := rlimit.SetNumFiles(1000000); err != nil {
   164  		klog.Infof("failed to set rlimit on max file handles: %v", err)
   165  	}
   166  
   167  	if *runServicesMode {
   168  		// If run-services-mode is specified, only run services in current process.
   169  		services.RunE2EServices(t)
   170  		return
   171  	}
   172  	if *runKubeletMode {
   173  		// If run-kubelet-mode is specified, only start kubelet.
   174  		services.RunKubelet(featureGates)
   175  		return
   176  	}
   177  	if *systemValidateMode {
   178  		// If system-validate-mode is specified, only run system validation in current process.
   179  		spec := &system.DefaultSysSpec
   180  		if *systemSpecFile != "" {
   181  			var err error
   182  			spec, err = loadSystemSpecFromFile(*systemSpecFile)
   183  			if err != nil {
   184  				klog.Exitf("Failed to load system spec: %v", err)
   185  			}
   186  		}
   187  		if framework.TestContext.NodeConformance {
   188  			// Chroot to /rootfs to make system validation can check system
   189  			// as in the root filesystem.
   190  			// TODO(random-liu): Consider to chroot the whole test process to make writing
   191  			// test easier.
   192  			if err := syscall.Chroot(rootfs); err != nil {
   193  				klog.Exitf("chroot %q failed: %v", rootfs, err)
   194  			}
   195  		}
   196  		if _, err := system.ValidateSpec(*spec, "remote"); len(err) != 0 {
   197  			klog.Exitf("system validation failed: %v", err)
   198  		}
   199  		return
   200  	}
   201  
   202  	// We're not running in a special mode so lets run tests.
   203  	gomega.RegisterFailHandler(ginkgo.Fail)
   204  	// Initialize the KubeletConfigDropinDir again if the test doesn't run in run-kubelet-mode.
   205  	if framework.TestContext.KubeletConfigDropinDir == "" {
   206  		var err error
   207  		framework.TestContext.KubeletConfigDropinDir, err = services.KubeletConfigDirCWDDir()
   208  		if err != nil {
   209  			klog.Errorf("failed to create kubelet config directory: %v", err)
   210  		}
   211  	}
   212  	reportDir := framework.TestContext.ReportDir
   213  	if reportDir != "" {
   214  		// Create the directory if it doesn't already exist
   215  		// NOTE: junit report can be simply created by executing your tests with the new --junit-report flags instead.
   216  		if err := os.MkdirAll(reportDir, 0755); err != nil {
   217  			klog.Errorf("Failed creating report directory: %v", err)
   218  		}
   219  	}
   220  	suiteConfig, reporterConfig := framework.CreateGinkgoConfig()
   221  	ginkgo.RunSpecs(t, "E2eNode Suite", suiteConfig, reporterConfig)
   222  }
   223  
   224  // Setup the kubelet on the node
   225  var _ = ginkgo.SynchronizedBeforeSuite(func(ctx context.Context) []byte {
   226  	// Run system validation test.
   227  	gomega.Expect(validateSystem()).To(gomega.Succeed(), "system validation")
   228  
   229  	// Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue
   230  	// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
   231  	if framework.TestContext.PrepullImages {
   232  		klog.Infof("Pre-pulling images so that they are cached for the tests.")
   233  		updateImageAllowList(ctx)
   234  		err := PrePullAllImages()
   235  		gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
   236  	}
   237  
   238  	// TODO(yifan): Temporary workaround to disable coreos from auto restart
   239  	// by masking the locksmithd.
   240  	// We should mask locksmithd when provisioning the machine.
   241  	maskLocksmithdOnCoreos()
   242  
   243  	if *startServices {
   244  		// If the services are expected to stop after test, they should monitor the test process.
   245  		// If the services are expected to keep running after test, they should not monitor the test process.
   246  		e2es = services.NewE2EServices(*stopServices)
   247  		gomega.Expect(e2es.Start(featureGates)).To(gomega.Succeed(), "should be able to start node services.")
   248  	} else {
   249  		klog.Infof("Running tests without starting services.")
   250  	}
   251  
   252  	if !framework.TestContext.StandaloneMode {
   253  		klog.Infof("Wait for the node to be ready")
   254  		waitForNodeReady(ctx)
   255  	}
   256  
   257  	// Reference common test to make the import valid.
   258  	commontest.CurrentSuite = commontest.NodeE2E
   259  
   260  	// ginkgo would spawn multiple processes to run tests.
   261  	// Since the bearer token is generated randomly at run time,
   262  	// we need to distribute the bearer token to other processes to make them use the same token.
   263  	return []byte(framework.TestContext.BearerToken)
   264  }, func(ctx context.Context, token []byte) {
   265  	framework.TestContext.BearerToken = string(token)
   266  	// update test context with node configuration.
   267  	gomega.Expect(updateTestContext(ctx)).To(gomega.Succeed(), "update test context with node config.")
   268  
   269  	// Store current Kubelet configuration in the package variable
   270  	// This assumes all tests which dynamically change kubelet configuration
   271  	// must: 1) run in serial; 2) restore kubelet configuration after test.
   272  	var err error
   273  	kubeletCfg, err = getCurrentKubeletConfig(ctx)
   274  	framework.ExpectNoError(err)
   275  })
   276  
   277  // Tear down the kubelet on the node
   278  var _ = ginkgo.SynchronizedAfterSuite(func() {}, func() {
   279  	if e2es != nil {
   280  		if *startServices && *stopServices {
   281  			klog.Infof("Stopping node services...")
   282  			e2es.Stop()
   283  		}
   284  	}
   285  
   286  	klog.Infof("Tests Finished")
   287  })
   288  
   289  // validateSystem runs system validation in a separate process and returns error if validation fails.
   290  func validateSystem() error {
   291  	testBin, err := os.Executable()
   292  	if err != nil {
   293  		return fmt.Errorf("can't get current binary: %w", err)
   294  	}
   295  	// Pass all flags into the child process, so that it will see the same flag set.
   296  	output, err := exec.Command(testBin, append([]string{"--system-validate-mode"}, os.Args[1:]...)...).CombinedOutput()
   297  	// The output of system validation should have been formatted, directly print here.
   298  	fmt.Print(string(output))
   299  	if err != nil {
   300  		return fmt.Errorf("system validation failed: %w", err)
   301  	}
   302  	return nil
   303  }
   304  
   305  func maskLocksmithdOnCoreos() {
   306  	data, err := os.ReadFile("/etc/os-release")
   307  	if err != nil {
   308  		// Not all distros contain this file.
   309  		klog.Infof("Could not read /etc/os-release: %v", err)
   310  		return
   311  	}
   312  	if bytes.Contains(data, []byte("ID=coreos")) {
   313  		output, err := exec.Command("systemctl", "mask", "--now", "locksmithd").CombinedOutput()
   314  		framework.ExpectNoError(err, fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output)))
   315  		klog.Infof("Locksmithd is masked successfully")
   316  	}
   317  }
   318  
   319  func waitForNodeReady(ctx context.Context) {
   320  	const (
   321  		// nodeReadyTimeout is the time to wait for node to become ready.
   322  		nodeReadyTimeout = 2 * time.Minute
   323  		// nodeReadyPollInterval is the interval to check node ready.
   324  		nodeReadyPollInterval = 1 * time.Second
   325  	)
   326  	client, err := getAPIServerClient()
   327  	framework.ExpectNoError(err, "should be able to get apiserver client.")
   328  	gomega.Eventually(ctx, func() error {
   329  		node, err := getNode(client)
   330  		if err != nil {
   331  			return fmt.Errorf("failed to get node: %w", err)
   332  		}
   333  		if !isNodeReady(node) {
   334  			return fmt.Errorf("node is not ready: %+v", node)
   335  		}
   336  		return nil
   337  	}, nodeReadyTimeout, nodeReadyPollInterval).Should(gomega.Succeed())
   338  }
   339  
   340  // updateTestContext updates the test context with the node name.
   341  func updateTestContext(ctx context.Context) error {
   342  	setExtraEnvs()
   343  	updateImageAllowList(ctx)
   344  
   345  	client, err := getAPIServerClient()
   346  	if err != nil {
   347  		return fmt.Errorf("failed to get apiserver client: %w", err)
   348  	}
   349  
   350  	if !framework.TestContext.StandaloneMode {
   351  		// Update test context with current node object.
   352  		node, err := getNode(client)
   353  		if err != nil {
   354  			return fmt.Errorf("failed to get node: %w", err)
   355  		}
   356  		framework.TestContext.NodeName = node.Name // Set node name from API server, it is already set to the computer name by default.
   357  	}
   358  
   359  	framework.Logf("Node name: %s", framework.TestContext.NodeName)
   360  
   361  	return nil
   362  }
   363  
   364  // getNode gets node object from the apiserver.
   365  func getNode(c *clientset.Clientset) (*v1.Node, error) {
   366  	nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
   367  	framework.ExpectNoError(err, "should be able to list nodes.")
   368  	if nodes == nil {
   369  		return nil, fmt.Errorf("the node list is nil")
   370  	}
   371  	gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically("<=", 1), "the number of nodes is more than 1.")
   372  	if len(nodes.Items) == 0 {
   373  		return nil, fmt.Errorf("empty node list: %+v", nodes)
   374  	}
   375  	return &nodes.Items[0], nil
   376  }
   377  
   378  // getAPIServerClient gets a apiserver client.
   379  func getAPIServerClient() (*clientset.Clientset, error) {
   380  	config, err := framework.LoadConfig()
   381  	if err != nil {
   382  		return nil, fmt.Errorf("failed to load config: %w", err)
   383  	}
   384  	client, err := clientset.NewForConfig(config)
   385  	if err != nil {
   386  		return nil, fmt.Errorf("failed to create client: %w", err)
   387  	}
   388  	return client, nil
   389  }
   390  
   391  // loadSystemSpecFromFile returns the system spec from the file with the
   392  // filename.
   393  func loadSystemSpecFromFile(filename string) (*system.SysSpec, error) {
   394  	b, err := os.ReadFile(filename)
   395  	if err != nil {
   396  		return nil, err
   397  	}
   398  	data, err := utilyaml.ToJSON(b)
   399  	if err != nil {
   400  		return nil, err
   401  	}
   402  	spec := new(system.SysSpec)
   403  	if err := json.Unmarshal(data, spec); err != nil {
   404  		return nil, err
   405  	}
   406  	return spec, nil
   407  }
   408  
   409  // isNodeReady returns true if a node is ready; false otherwise.
   410  func isNodeReady(node *v1.Node) bool {
   411  	for _, c := range node.Status.Conditions {
   412  		if c.Type == v1.NodeReady {
   413  			return c.Status == v1.ConditionTrue
   414  		}
   415  	}
   416  	return false
   417  }
   418  
   419  func setExtraEnvs() {
   420  	for name, value := range framework.TestContext.ExtraEnvs {
   421  		os.Setenv(name, value)
   422  	}
   423  }