k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/e2e_node_suite_test.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/e2e_node_suite_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2016 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  // To run tests in this suite
    21  // NOTE: This test suite requires password-less sudo capabilities to run the kubelet and kube-apiserver.
    22  package e2enode
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"encoding/json"
    28  	"flag"
    29  	"fmt"
    30  
    31  	"os"
    32  	"os/exec"
    33  	"syscall"
    34  	"testing"
    35  	"time"
    36  
    37  	v1 "k8s.io/api/core/v1"
    38  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    39  	utilyaml "k8s.io/apimachinery/pkg/util/yaml"
    40  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    41  	clientset "k8s.io/client-go/kubernetes"
    42  	cliflag "k8s.io/component-base/cli/flag"
    43  	"k8s.io/kubernetes/pkg/util/rlimit"
    44  	commontest "k8s.io/kubernetes/test/e2e/common"
    45  	"k8s.io/kubernetes/test/e2e/framework"
    46  	e2econfig "k8s.io/kubernetes/test/e2e/framework/config"
    47  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    48  	e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
    49  	e2etestingmanifests "k8s.io/kubernetes/test/e2e/testing-manifests"
    50  	"k8s.io/kubernetes/test/e2e_node/services"
    51  	e2enodetestingmanifests "k8s.io/kubernetes/test/e2e_node/testing-manifests"
    52  	system "k8s.io/system-validators/validators"
    53  
    54  	// define and freeze constants
    55  	_ "k8s.io/kubernetes/test/e2e/feature"
    56  	_ "k8s.io/kubernetes/test/e2e/nodefeature"
    57  
    58  	// reconfigure framework
    59  	_ "k8s.io/kubernetes/test/e2e/framework/debug/init"
    60  	_ "k8s.io/kubernetes/test/e2e/framework/metrics/init"
    61  	_ "k8s.io/kubernetes/test/e2e/framework/node/init"
    62  	_ "k8s.io/kubernetes/test/utils/format"
    63  
    64  	"github.com/onsi/ginkgo/v2"
    65  	"github.com/onsi/gomega"
    66  	"github.com/spf13/pflag"
    67  	"k8s.io/klog/v2"
    68  )
    69  
    70  var (
    71  	e2es *services.E2EServices
    72  	// featureGates is a map of feature names to bools that enable or disable alpha/experimental features.
    73  	featureGates map[string]bool
    74  	// serviceFeatureGates is a map of feature names to bools that enable or
    75  	// disable alpha/experimental features for API service.
    76  	serviceFeatureGates map[string]bool
    77  
    78  	// TODO(random-liu): Change the following modes to sub-command.
    79  	runServicesMode    = flag.Bool("run-services-mode", false, "If true, only run services (etcd, apiserver) in current process, and not run test.")
    80  	runKubeletMode     = flag.Bool("run-kubelet-mode", false, "If true, only start kubelet, and not run test.")
    81  	systemValidateMode = flag.Bool("system-validate-mode", false, "If true, only run system validation in current process, and not run test.")
    82  	systemSpecFile     = flag.String("system-spec-file", "", "The name of the system spec file that will be used for node conformance test. If it's unspecified or empty, the default system spec (system.DefaultSysSpec) will be used.")
    83  )
    84  
    85  // registerNodeFlags registers flags specific to the node e2e test suite.
    86  func registerNodeFlags(flags *flag.FlagSet) {
    87  	// Mark the test as node e2e when node flags are api.Registry.
    88  	framework.TestContext.NodeE2E = true
    89  	flags.StringVar(&framework.TestContext.BearerToken, "bearer-token", "", "The bearer token to authenticate with. If not specified, it would be a random token. Currently this token is only used in node e2e tests.")
    90  	flags.StringVar(&framework.TestContext.NodeName, "node-name", "", "Name of the node to run tests on.")
    91  	flags.StringVar(&framework.TestContext.KubeletConfigDropinDir, "config-dir", "", "Path to a directory containing drop-in configurations for the kubelet.")
    92  	// TODO(random-liu): Move kubelet start logic out of the test.
    93  	// TODO(random-liu): Move log fetch logic out of the test.
    94  	// There are different ways to start kubelet (systemd, initd, docker, manually started etc.)
    95  	// and manage logs (journald, upstart etc.).
    96  	// For different situation we need to mount different things into the container, run different commands.
    97  	// It is hard and unnecessary to deal with the complexity inside the test suite.
    98  	flags.BoolVar(&framework.TestContext.NodeConformance, "conformance", false, "If true, the test suite will not start kubelet, and fetch system log (kernel, docker, kubelet log etc.) to the report directory.")
    99  	flags.BoolVar(&framework.TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
   100  	flags.BoolVar(&framework.TestContext.RestartKubelet, "restart-kubelet", false, "If true, restart Kubelet unit when the process is killed.")
   101  	flags.StringVar(&framework.TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
   102  	flags.StringVar(&framework.TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
   103  	flags.Var(cliflag.NewMapStringString(&framework.TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
   104  	flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.")
   105  	flag.StringVar(&framework.TestContext.ClusterDNSDomain, "dns-domain", "", "The DNS Domain of the cluster.")
   106  	flag.Var(cliflag.NewMapStringString(&framework.TestContext.RuntimeConfig), "runtime-config", "The runtime configuration used on node e2e tests.")
   107  	flags.BoolVar(&framework.TestContext.RequireDevices, "require-devices", false, "If true, require device plugins to be installed in the running environment.")
   108  	flags.Var(cliflag.NewMapStringBool(&featureGates), "feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features.")
   109  	flags.Var(cliflag.NewMapStringBool(&serviceFeatureGates), "service-feature-gates", "A set of key=value pairs that describe feature gates for alpha/experimental features for API service.")
   110  	flags.BoolVar(&framework.TestContext.StandaloneMode, "standalone-mode", false, "If true, starts kubelet in standalone mode.")
   111  }
   112  
   113  func init() {
   114  	// Enable embedded FS file lookup as fallback
   115  	e2etestfiles.AddFileSource(e2etestingmanifests.GetE2ETestingManifestsFS())
   116  	e2etestfiles.AddFileSource(e2enodetestingmanifests.GetE2ENodeTestingManifestsFS())
   117  }
   118  
   119  func TestMain(m *testing.M) {
   120  	// Copy go flags in TestMain, to ensure go test flags are registered (no longer available in init() as of go1.13)
   121  	e2econfig.CopyFlags(e2econfig.Flags, flag.CommandLine)
   122  	framework.RegisterCommonFlags(flag.CommandLine)
   123  	registerNodeFlags(flag.CommandLine)
   124  	pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
   125  	// Mark the run-services-mode flag as hidden to prevent user from using it.
   126  	pflag.CommandLine.MarkHidden("run-services-mode")
   127  	// It's weird that if I directly use pflag in TestContext, it will report error.
   128  	// It seems that someone is using flag.Parse() after init() and TestMain().
   129  	// TODO(random-liu): Find who is using flag.Parse() and cause errors and move the following logic
   130  	// into TestContext.
   131  	// TODO(pohly): remove RegisterNodeFlags from test_context.go enable Viper config support here?
   132  
   133  	pflag.Parse()
   134  	if pflag.CommandLine.NArg() > 0 {
   135  		fmt.Fprintf(os.Stderr, "unknown additional command line arguments: %s", pflag.CommandLine.Args())
   136  		os.Exit(1)
   137  	}
   138  	framework.AfterReadingAllFlags(&framework.TestContext)
   139  	if err := e2eskipper.InitFeatureGates(utilfeature.DefaultFeatureGate, featureGates); err != nil {
   140  		fmt.Fprintf(os.Stderr, "ERROR: initialize feature gates: %v", err)
   141  		os.Exit(1)
   142  	}
   143  
   144  	if err := services.SetFeatureGatesForInProcessComponents(serviceFeatureGates); err != nil {
   145  		fmt.Fprintf(os.Stderr, "ERROR: initialize process feature gates for API service: %v", err)
   146  		os.Exit(1)
   147  	}
   148  
   149  	setExtraEnvs()
   150  	os.Exit(m.Run())
   151  }
   152  
   153  // When running the containerized conformance test, we'll mount the
   154  // host root filesystem as readonly to /rootfs.
   155  const rootfs = "/rootfs"
   156  
   157  func TestE2eNode(t *testing.T) {
   158  	// Make sure we are not limited by sshd when it comes to open files
   159  	if err := rlimit.SetNumFiles(1000000); err != nil {
   160  		klog.Infof("failed to set rlimit on max file handles: %v", err)
   161  	}
   162  
   163  	if *runServicesMode {
   164  		// If run-services-mode is specified, only run services in current process.
   165  		services.RunE2EServices(t)
   166  		return
   167  	}
   168  	if *runKubeletMode {
   169  		// If run-kubelet-mode is specified, only start kubelet.
   170  		services.RunKubelet(featureGates)
   171  		return
   172  	}
   173  	if *systemValidateMode {
   174  		// If system-validate-mode is specified, only run system validation in current process.
   175  		spec := &system.DefaultSysSpec
   176  		if *systemSpecFile != "" {
   177  			var err error
   178  			spec, err = loadSystemSpecFromFile(*systemSpecFile)
   179  			if err != nil {
   180  				klog.Exitf("Failed to load system spec: %v", err)
   181  			}
   182  		}
   183  		if framework.TestContext.NodeConformance {
   184  			// Chroot to /rootfs to make system validation can check system
   185  			// as in the root filesystem.
   186  			// TODO(random-liu): Consider to chroot the whole test process to make writing
   187  			// test easier.
   188  			if err := syscall.Chroot(rootfs); err != nil {
   189  				klog.Exitf("chroot %q failed: %v", rootfs, err)
   190  			}
   191  		}
   192  		if _, err := system.ValidateSpec(*spec, "remote"); len(err) != 0 {
   193  			klog.Exitf("system validation failed: %v", err)
   194  		}
   195  		return
   196  	}
   197  
   198  	// We're not running in a special mode so lets run tests.
   199  	gomega.RegisterFailHandler(ginkgo.Fail)
   200  	// Initialize the KubeletConfigDropinDir again if the test doesn't run in run-kubelet-mode.
   201  	if framework.TestContext.KubeletConfigDropinDir == "" {
   202  		var err error
   203  		framework.TestContext.KubeletConfigDropinDir, err = services.KubeletConfigDirCWDDir()
   204  		if err != nil {
   205  			klog.Errorf("failed to create kubelet config directory: %v", err)
   206  		}
   207  	}
   208  	reportDir := framework.TestContext.ReportDir
   209  	if reportDir != "" {
   210  		// Create the directory if it doesn't already exist
   211  		// NOTE: junit report can be simply created by executing your tests with the new --junit-report flags instead.
   212  		if err := os.MkdirAll(reportDir, 0755); err != nil {
   213  			klog.Errorf("Failed creating report directory: %v", err)
   214  		}
   215  	}
   216  	suiteConfig, reporterConfig := framework.CreateGinkgoConfig()
   217  	ginkgo.RunSpecs(t, "E2eNode Suite", suiteConfig, reporterConfig)
   218  }
   219  
   220  // Setup the kubelet on the node
   221  var _ = ginkgo.SynchronizedBeforeSuite(func(ctx context.Context) []byte {
   222  	// Run system validation test.
   223  	gomega.Expect(validateSystem()).To(gomega.Succeed(), "system validation")
   224  
   225  	// Pre-pull the images tests depend on so we can fail immediately if there is an image pull issue
   226  	// This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling.
   227  	if framework.TestContext.PrepullImages {
   228  		klog.Infof("Pre-pulling images so that they are cached for the tests.")
   229  		updateImageAllowList(ctx)
   230  		err := PrePullAllImages()
   231  		gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
   232  	}
   233  
   234  	// TODO(yifan): Temporary workaround to disable coreos from auto restart
   235  	// by masking the locksmithd.
   236  	// We should mask locksmithd when provisioning the machine.
   237  	maskLocksmithdOnCoreos()
   238  
   239  	if *startServices {
   240  		// If the services are expected to stop after test, they should monitor the test process.
   241  		// If the services are expected to keep running after test, they should not monitor the test process.
   242  		e2es = services.NewE2EServices(*stopServices)
   243  		gomega.Expect(e2es.Start(featureGates)).To(gomega.Succeed(), "should be able to start node services.")
   244  	} else {
   245  		klog.Infof("Running tests without starting services.")
   246  	}
   247  
   248  	if !framework.TestContext.StandaloneMode {
   249  		klog.Infof("Wait for the node to be ready")
   250  		waitForNodeReady(ctx)
   251  	}
   252  
   253  	// Reference common test to make the import valid.
   254  	commontest.CurrentSuite = commontest.NodeE2E
   255  
   256  	// ginkgo would spawn multiple processes to run tests.
   257  	// Since the bearer token is generated randomly at run time,
   258  	// we need to distribute the bearer token to other processes to make them use the same token.
   259  	return []byte(framework.TestContext.BearerToken)
   260  }, func(ctx context.Context, token []byte) {
   261  	framework.TestContext.BearerToken = string(token)
   262  	// update test context with node configuration.
   263  	gomega.Expect(updateTestContext(ctx)).To(gomega.Succeed(), "update test context with node config.")
   264  
   265  	// Store current Kubelet configuration in the package variable
   266  	// This assumes all tests which dynamically change kubelet configuration
   267  	// must: 1) run in serial; 2) restore kubelet configuration after test.
   268  	var err error
   269  	kubeletCfg, err = getCurrentKubeletConfig(ctx)
   270  	framework.ExpectNoError(err)
   271  })
   272  
   273  // Tear down the kubelet on the node
   274  var _ = ginkgo.SynchronizedAfterSuite(func() {}, func() {
   275  	if e2es != nil {
   276  		if *startServices && *stopServices {
   277  			klog.Infof("Stopping node services...")
   278  			e2es.Stop()
   279  		}
   280  	}
   281  
   282  	klog.Infof("Tests Finished")
   283  })
   284  
   285  // validateSystem runs system validation in a separate process and returns error if validation fails.
   286  func validateSystem() error {
   287  	testBin, err := os.Executable()
   288  	if err != nil {
   289  		return fmt.Errorf("can't get current binary: %w", err)
   290  	}
   291  	// Pass all flags into the child process, so that it will see the same flag set.
   292  	output, err := exec.Command(testBin, append([]string{"--system-validate-mode"}, os.Args[1:]...)...).CombinedOutput()
   293  	// The output of system validation should have been formatted, directly print here.
   294  	fmt.Print(string(output))
   295  	if err != nil {
   296  		return fmt.Errorf("system validation failed: %w", err)
   297  	}
   298  	return nil
   299  }
   300  
   301  func maskLocksmithdOnCoreos() {
   302  	data, err := os.ReadFile("/etc/os-release")
   303  	if err != nil {
   304  		// Not all distros contain this file.
   305  		klog.Infof("Could not read /etc/os-release: %v", err)
   306  		return
   307  	}
   308  	if bytes.Contains(data, []byte("ID=coreos")) {
   309  		output, err := exec.Command("systemctl", "mask", "--now", "locksmithd").CombinedOutput()
   310  		framework.ExpectNoError(err, fmt.Sprintf("should be able to mask locksmithd - output: %q", string(output)))
   311  		klog.Infof("Locksmithd is masked successfully")
   312  	}
   313  }
   314  
   315  func waitForNodeReady(ctx context.Context) {
   316  	const (
   317  		// nodeReadyTimeout is the time to wait for node to become ready.
   318  		nodeReadyTimeout = 2 * time.Minute
   319  		// nodeReadyPollInterval is the interval to check node ready.
   320  		nodeReadyPollInterval = 1 * time.Second
   321  	)
   322  	client, err := getAPIServerClient()
   323  	framework.ExpectNoError(err, "should be able to get apiserver client.")
   324  	gomega.Eventually(ctx, func() error {
   325  		node, err := getNode(client)
   326  		if err != nil {
   327  			return fmt.Errorf("failed to get node: %w", err)
   328  		}
   329  		if !isNodeReady(node) {
   330  			return fmt.Errorf("node is not ready: %+v", node)
   331  		}
   332  		return nil
   333  	}, nodeReadyTimeout, nodeReadyPollInterval).Should(gomega.Succeed())
   334  }
   335  
   336  // updateTestContext updates the test context with the node name.
   337  func updateTestContext(ctx context.Context) error {
   338  	setExtraEnvs()
   339  	updateImageAllowList(ctx)
   340  
   341  	client, err := getAPIServerClient()
   342  	if err != nil {
   343  		return fmt.Errorf("failed to get apiserver client: %w", err)
   344  	}
   345  
   346  	if !framework.TestContext.StandaloneMode {
   347  		// Update test context with current node object.
   348  		node, err := getNode(client)
   349  		if err != nil {
   350  			return fmt.Errorf("failed to get node: %w", err)
   351  		}
   352  		framework.TestContext.NodeName = node.Name // Set node name from API server, it is already set to the computer name by default.
   353  	}
   354  
   355  	framework.Logf("Node name: %s", framework.TestContext.NodeName)
   356  
   357  	return nil
   358  }
   359  
   360  // getNode gets node object from the apiserver.
   361  func getNode(c *clientset.Clientset) (*v1.Node, error) {
   362  	nodes, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
   363  	framework.ExpectNoError(err, "should be able to list nodes.")
   364  	if nodes == nil {
   365  		return nil, fmt.Errorf("the node list is nil")
   366  	}
   367  	gomega.Expect(len(nodes.Items)).To(gomega.BeNumerically("<=", 1), "the number of nodes is more than 1.")
   368  	if len(nodes.Items) == 0 {
   369  		return nil, fmt.Errorf("empty node list: %+v", nodes)
   370  	}
   371  	return &nodes.Items[0], nil
   372  }
   373  
   374  // getAPIServerClient gets a apiserver client.
   375  func getAPIServerClient() (*clientset.Clientset, error) {
   376  	config, err := framework.LoadConfig()
   377  	if err != nil {
   378  		return nil, fmt.Errorf("failed to load config: %w", err)
   379  	}
   380  	client, err := clientset.NewForConfig(config)
   381  	if err != nil {
   382  		return nil, fmt.Errorf("failed to create client: %w", err)
   383  	}
   384  	return client, nil
   385  }
   386  
   387  // loadSystemSpecFromFile returns the system spec from the file with the
   388  // filename.
   389  func loadSystemSpecFromFile(filename string) (*system.SysSpec, error) {
   390  	b, err := os.ReadFile(filename)
   391  	if err != nil {
   392  		return nil, err
   393  	}
   394  	data, err := utilyaml.ToJSON(b)
   395  	if err != nil {
   396  		return nil, err
   397  	}
   398  	spec := new(system.SysSpec)
   399  	if err := json.Unmarshal(data, spec); err != nil {
   400  		return nil, err
   401  	}
   402  	return spec, nil
   403  }
   404  
   405  // isNodeReady returns true if a node is ready; false otherwise.
   406  func isNodeReady(node *v1.Node) bool {
   407  	for _, c := range node.Status.Conditions {
   408  		if c.Type == v1.NodeReady {
   409  			return c.Status == v1.ConditionTrue
   410  		}
   411  	}
   412  	return false
   413  }
   414  
   415  func setExtraEnvs() {
   416  	for name, value := range framework.TestContext.ExtraEnvs {
   417  		os.Setenv(name, value)
   418  	}
   419  }