k8s.io/kubernetes@v1.29.3/test/e2e/framework/test_context.go

k8s.io/kubernetes@v1.29.3/test/e2e/framework/test_context.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package framework
    18  
    19  import (
    20  	"context"
    21  	"crypto/rand"
    22  	"encoding/base64"
    23  	"errors"
    24  	"flag"
    25  	"fmt"
    26  	"io"
    27  	"math"
    28  	"os"
    29  	"path"
    30  	"path/filepath"
    31  	"sort"
    32  	"strings"
    33  	"time"
    34  
    35  	"github.com/onsi/ginkgo/v2"
    36  	"github.com/onsi/ginkgo/v2/reporters"
    37  	"github.com/onsi/ginkgo/v2/types"
    38  	"github.com/onsi/gomega"
    39  	gomegaformat "github.com/onsi/gomega/format"
    40  
    41  	"k8s.io/apimachinery/pkg/util/sets"
    42  	restclient "k8s.io/client-go/rest"
    43  	"k8s.io/client-go/tools/clientcmd"
    44  	cliflag "k8s.io/component-base/cli/flag"
    45  	"k8s.io/klog/v2"
    46  
    47  	"k8s.io/kubernetes/test/e2e/framework/internal/junit"
    48  	"k8s.io/kubernetes/test/utils/image"
    49  	"k8s.io/kubernetes/test/utils/kubeconfig"
    50  )
    51  
    52  const (
    53  	defaultHost = "https://127.0.0.1:6443"
    54  
    55  	// DefaultNumNodes is the number of nodes. If not specified, then number of nodes is auto-detected
    56  	DefaultNumNodes = -1
    57  )
    58  
    59  var (
    60  	// Output is used for output when not running tests, for example in -list-tests.
    61  	// Test output should go to ginkgo.GinkgoWriter.
    62  	Output io.Writer = os.Stdout
    63  
    64  	// Exit is called when the framework detects fatal errors or when
    65  	// it is done with the execution of e.g. -list-tests.
    66  	Exit = os.Exit
    67  
    68  	// CheckForBugs determines whether the framework bails out when
    69  	// test initialization found any bugs.
    70  	CheckForBugs = true
    71  )
    72  
    73  // TestContextType contains test settings and global state. Due to
    74  // historic reasons, it is a mixture of items managed by the test
    75  // framework itself, cloud providers and individual tests.
    76  // The goal is to move anything not required by the framework
    77  // into the code which uses the settings.
    78  //
    79  // The recommendation for those settings is:
    80  //   - They are stored in their own context structure or local
    81  //     variables.
    82  //   - The standard `flag` package is used to register them.
    83  //     The flag name should follow the pattern <part1>.<part2>....<partn>
    84  //     where the prefix is unlikely to conflict with other tests or
    85  //     standard packages and each part is in lower camel case. For
    86  //     example, test/e2e/storage/csi/context.go could define
    87  //     storage.csi.numIterations.
    88  //   - framework/config can be used to simplify the registration of
    89  //     multiple options with a single function call:
    90  //     var storageCSI {
    91  //     NumIterations `default:"1" usage:"number of iterations"`
    92  //     }
    93  //     _ config.AddOptions(&storageCSI, "storage.csi")
    94  //   - The direct use Viper in tests is possible, but discouraged because
    95  //     it only works in test suites which use Viper (which is not
    96  //     required) and the supported options cannot be
    97  //     discovered by a test suite user.
    98  //
    99  // Test suite authors can use framework/viper to make all command line
   100  // parameters also configurable via a configuration file.
   101  type TestContextType struct {
   102  	KubeConfig             string
   103  	KubeContext            string
   104  	KubeAPIContentType     string
   105  	KubeletRootDir         string
   106  	KubeletConfigDropinDir string
   107  	CertDir                string
   108  	Host                   string
   109  	BearerToken            string `datapolicy:"token"`
   110  	// TODO: Deprecating this over time... instead just use gobindata_util.go , see #23987.
   111  	RepoRoot string
   112  	// ListImages will list off all images that are used then quit
   113  	ListImages bool
   114  
   115  	listTests, listLabels bool
   116  
   117  	// ListConformanceTests will list off all conformance tests that are available then quit
   118  	ListConformanceTests bool
   119  
   120  	// Provider identifies the infrastructure provider (gce, gke, aws)
   121  	Provider string
   122  
   123  	// Tooling is the tooling in use (e.g. kops, gke).  Provider is the cloud provider and might not uniquely identify the tooling.
   124  	Tooling string
   125  
   126  	// timeouts contains user-configurable timeouts for various operations.
   127  	// Individual Framework instance also have such timeouts which may be
   128  	// different from these here. To avoid confusion, this field is not
   129  	// exported. Its values can be accessed through
   130  	// NewTimeoutContext.
   131  	timeouts TimeoutContext
   132  
   133  	CloudConfig                 CloudConfig
   134  	KubectlPath                 string
   135  	OutputDir                   string
   136  	ReportDir                   string
   137  	ReportPrefix                string
   138  	ReportCompleteGinkgo        bool
   139  	ReportCompleteJUnit         bool
   140  	Prefix                      string
   141  	MinStartupPods              int
   142  	EtcdUpgradeStorage          string
   143  	EtcdUpgradeVersion          string
   144  	GCEUpgradeScript            string
   145  	ContainerRuntimeEndpoint    string
   146  	ContainerRuntimeProcessName string
   147  	ContainerRuntimePidFile     string
   148  	// SystemdServices are comma separated list of systemd services the test framework
   149  	// will dump logs for.
   150  	SystemdServices string
   151  	// DumpSystemdJournal controls whether to dump the full systemd journal.
   152  	DumpSystemdJournal       bool
   153  	ImageServiceEndpoint     string
   154  	MasterOSDistro           string
   155  	NodeOSDistro             string
   156  	NodeOSArch               string
   157  	VerifyServiceAccount     bool
   158  	DeleteNamespace          bool
   159  	DeleteNamespaceOnFailure bool
   160  	AllowedNotReadyNodes     int
   161  	CleanStart               bool
   162  	// If set to 'true' or 'all' framework will start a goroutine monitoring resource usage of system add-ons.
   163  	// It will read the data every 30 seconds from all Nodes and print summary during afterEach. If set to 'master'
   164  	// only master Node will be monitored.
   165  	GatherKubeSystemResourceUsageData string
   166  	GatherLogsSizes                   bool
   167  	GatherMetricsAfterTest            string
   168  	GatherSuiteMetricsAfterTest       bool
   169  	MaxNodesToGather                  int
   170  	// If set to 'true' framework will gather ClusterAutoscaler metrics when gathering them for other components.
   171  	IncludeClusterAutoscalerMetrics bool
   172  	// Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list.
   173  	OutputPrintType string
   174  	// CreateTestingNS is responsible for creating namespace used for executing e2e tests.
   175  	// It accepts namespace base name, which will be prepended with e2e prefix, kube client
   176  	// and labels to be applied to a namespace.
   177  	CreateTestingNS CreateTestingNSFn
   178  	// If set to true test will dump data about the namespace in which test was running.
   179  	DumpLogsOnFailure bool
   180  	// Disables dumping cluster log from master and nodes after all tests.
   181  	DisableLogDump bool
   182  	// Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty.
   183  	LogexporterGCSPath string
   184  	// Node e2e specific test context
   185  	NodeTestContextType
   186  
   187  	// The DNS Domain of the cluster.
   188  	ClusterDNSDomain string
   189  
   190  	// The configuration of NodeKiller.
   191  	NodeKiller NodeKillerConfig
   192  
   193  	// The Default IP Family of the cluster ("ipv4" or "ipv6")
   194  	IPFamily string
   195  
   196  	// NonblockingTaints is the comma-delimeted string given by the user to specify taints which should not stop the test framework from running tests.
   197  	NonblockingTaints string
   198  
   199  	// ProgressReportURL is the URL which progress updates will be posted to as tests complete. If empty, no updates are sent.
   200  	ProgressReportURL string
   201  
   202  	// SriovdpConfigMapFile is the path to the ConfigMap to configure the SRIOV device plugin on this host.
   203  	SriovdpConfigMapFile string
   204  
   205  	// SpecSummaryOutput is the file to write ginkgo.SpecSummary objects to as tests complete. Useful for debugging and test introspection.
   206  	SpecSummaryOutput string
   207  
   208  	// DockerConfigFile is a file that contains credentials which can be used to pull images from certain private registries, needed for a test.
   209  	DockerConfigFile string
   210  
   211  	// E2EDockerConfigFile is a docker credentials configuration file used which contains authorization token that can be used to pull images from certain private registries provided by the users.
   212  	// For more details refer https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#log-in-to-docker-hub
   213  	E2EDockerConfigFile string
   214  
   215  	// KubeTestRepoConfigFile is a yaml file used for overriding registries for test images.
   216  	KubeTestRepoList string
   217  
   218  	// SnapshotControllerPodName is the name used for identifying the snapshot controller pod.
   219  	SnapshotControllerPodName string
   220  
   221  	// SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint.
   222  	SnapshotControllerHTTPPort int
   223  
   224  	// RequireDevices makes mandatory on the environment on which tests are run 1+ devices exposed through device plugins.
   225  	// With this enabled The e2e tests requiring devices for their operation can assume that if devices aren't reported, the test can fail
   226  	RequireDevices bool
   227  
   228  	// Enable volume drivers which are disabled by default. See test/e2e/storage/in_tree_volumes.go for details.
   229  	EnabledVolumeDrivers []string
   230  }
   231  
   232  // NodeKillerConfig describes configuration of NodeKiller -- a utility to
   233  // simulate node failures.
   234  //
   235  // TODO: move this and the corresponding command line flags into
   236  // test/e2e/framework/node.
   237  type NodeKillerConfig struct {
   238  	// Enabled determines whether NodeKill should do anything at all.
   239  	// All other options below are ignored if Enabled = false.
   240  	Enabled bool
   241  	// FailureRatio is a percentage of all nodes that could fail simultinously.
   242  	FailureRatio float64
   243  	// Interval is time between node failures.
   244  	Interval time.Duration
   245  	// JitterFactor is factor used to jitter node failures.
   246  	// Node will be killed between [Interval, Interval + (1.0 + JitterFactor)].
   247  	JitterFactor float64
   248  	// SimulatedDowntime is a duration between node is killed and recreated.
   249  	SimulatedDowntime time.Duration
   250  	// NodeKillerStopCtx is a context that is used to notify NodeKiller to stop killing nodes.
   251  	NodeKillerStopCtx context.Context
   252  	// NodeKillerStop is the cancel function for NodeKillerStopCtx.
   253  	NodeKillerStop func()
   254  }
   255  
   256  // NodeTestContextType is part of TestContextType, it is shared by all node e2e test.
   257  type NodeTestContextType struct {
   258  	// NodeE2E indicates whether it is running node e2e.
   259  	NodeE2E bool
   260  	// Name of the node to run tests on.
   261  	NodeName string
   262  	// NodeConformance indicates whether the test is running in node conformance mode.
   263  	NodeConformance bool
   264  	// PrepullImages indicates whether node e2e framework should prepull images.
   265  	PrepullImages bool
   266  	// ImageDescription is the description of the image on which the test is running.
   267  	ImageDescription string
   268  	// RuntimeConfig is a map of API server runtime configuration values.
   269  	RuntimeConfig map[string]string
   270  	// SystemSpecName is the name of the system spec (e.g., gke) that's used in
   271  	// the node e2e test. If empty, the default one (system.DefaultSpec) is
   272  	// used. The system specs are in test/e2e_node/system/specs/.
   273  	SystemSpecName string
   274  	// RestartKubelet restarts Kubelet unit when the process is killed.
   275  	RestartKubelet bool
   276  	// ExtraEnvs is a map of environment names to values.
   277  	ExtraEnvs map[string]string
   278  	// StandaloneMode indicates whether the test is running kubelet in a standalone mode.
   279  	StandaloneMode bool
   280  }
   281  
   282  // CloudConfig holds the cloud configuration for e2e test suites.
   283  type CloudConfig struct {
   284  	APIEndpoint       string
   285  	ProjectID         string
   286  	Zone              string   // for multizone tests, arbitrarily chosen zone
   287  	Zones             []string // for multizone tests, use this set of zones instead of querying the cloud provider. Must include Zone.
   288  	Region            string
   289  	MultiZone         bool
   290  	MultiMaster       bool
   291  	Cluster           string
   292  	MasterName        string
   293  	NodeInstanceGroup string // comma-delimited list of groups' names
   294  	NumNodes          int
   295  	ClusterIPRange    string
   296  	ClusterTag        string
   297  	Network           string
   298  	ConfigFile        string // for azure
   299  	NodeTag           string
   300  	MasterTag         string
   301  
   302  	Provider ProviderInterface
   303  }
   304  
   305  // TestContext should be used by all tests to access common context data.
   306  var TestContext = TestContextType{
   307  	timeouts: defaultTimeouts,
   308  }
   309  
   310  // StringArrayValue is used with flag.Var for a comma-separated list of strings placed into a string array.
   311  type stringArrayValue struct {
   312  	stringArray *[]string
   313  }
   314  
   315  func (v stringArrayValue) String() string {
   316  	if v.stringArray != nil {
   317  		return strings.Join(*v.stringArray, ",")
   318  	}
   319  	return ""
   320  }
   321  
   322  func (v stringArrayValue) Set(s string) error {
   323  	if len(s) == 0 {
   324  		*v.stringArray = []string{}
   325  	} else {
   326  		*v.stringArray = strings.Split(s, ",")
   327  	}
   328  	return nil
   329  }
   330  
   331  // ClusterIsIPv6 returns true if the cluster is IPv6
   332  func (tc TestContextType) ClusterIsIPv6() bool {
   333  	return tc.IPFamily == "ipv6"
   334  }
   335  
   336  // RegisterCommonFlags registers flags common to all e2e test suites.
   337  // The flag set can be flag.CommandLine (if desired) or a custom
   338  // flag set that then gets passed to viperconfig.ViperizeFlags.
   339  //
   340  // The other Register*Flags methods below can be used to add more
   341  // test-specific flags. However, those settings then get added
   342  // regardless whether the test is actually in the test suite.
   343  //
   344  // For tests that have been converted to registering their
   345  // options themselves, copy flags from test/e2e/framework/config
   346  // as shown in HandleFlags.
   347  func RegisterCommonFlags(flags *flag.FlagSet) {
   348  	// The default is too low for objects like pods, even when using YAML. We double the default.
   349  	flags.IntVar(&gomegaformat.MaxLength, "gomega-max-length", 8000, "Sets the maximum size for the gomega formatter (= gomega.MaxLength). Use 0 to disable truncation.")
   350  
   351  	flags.StringVar(&TestContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", "false", "If set to 'true' or 'all' framework will be monitoring resource usage of system all add-ons in (some) e2e tests, if set to 'master' framework will be monitoring master node only, if set to 'none' of 'false' monitoring will be turned off.")
   352  	flags.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
   353  	flags.IntVar(&TestContext.MaxNodesToGather, "max-nodes-to-gather-from", 20, "The maximum number of nodes to gather extended info from on test failure.")
   354  	flags.StringVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", "false", "If set to 'true' framework will gather metrics from all components after each test. If set to 'master' only master component metrics would be gathered.")
   355  	flags.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framework will gather metrics from all components after the whole test suite completes.")
   356  	flags.BoolVar(&TestContext.IncludeClusterAutoscalerMetrics, "include-cluster-autoscaler", false, "If set to true, framework will include Cluster Autoscaler when gathering metrics.")
   357  	flags.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.")
   358  	flags.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")
   359  	flags.BoolVar(&TestContext.DisableLogDump, "disable-log-dump", false, "If set to true, logs from master and nodes won't be gathered after test run.")
   360  	flags.StringVar(&TestContext.LogexporterGCSPath, "logexporter-gcs-path", "", "Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty.")
   361  	flags.BoolVar(&TestContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
   362  	flags.BoolVar(&TestContext.DeleteNamespaceOnFailure, "delete-namespace-on-failure", true, "If true, framework will delete test namespace on failure. Used only during test debugging.")
   363  	flags.IntVar(&TestContext.AllowedNotReadyNodes, "allowed-not-ready-nodes", 0, "If greater than zero, framework will allow for that many non-ready nodes when checking for all ready nodes. If -1, no waiting will be performed for ready nodes or daemonset pods.")
   364  
   365  	flags.StringVar(&TestContext.Host, "host", "", fmt.Sprintf("The host, or apiserver, to connect to. Will default to %s if this argument and --kubeconfig are not set.", defaultHost))
   366  	flags.StringVar(&TestContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
   367  	flags.StringVar(&TestContext.ReportDir, "report-dir", "", "Path to the directory where the simplified JUnit XML reports and other tests results should be saved. Default is empty, which doesn't generate these reports.  If ginkgo's -junit-report parameter is used, that parameter instead of -report-dir determines the location of a single JUnit report.")
   368  	flags.BoolVar(&TestContext.ReportCompleteGinkgo, "report-complete-ginkgo", false, "Enables writing a complete test report as Ginkgo JSON to <report dir>/ginkgo/report.json. Ignored if --report-dir is not set.")
   369  	flags.BoolVar(&TestContext.ReportCompleteJUnit, "report-complete-junit", false, "Enables writing a complete test report as JUnit XML to <report dir>/ginkgo/report.json. Ignored if --report-dir is not set.")
   370  	flags.StringVar(&TestContext.ContainerRuntimeEndpoint, "container-runtime-endpoint", "unix:///run/containerd/containerd.sock", "The container runtime endpoint of cluster VM instances.")
   371  	flags.StringVar(&TestContext.ContainerRuntimeProcessName, "container-runtime-process-name", "containerd", "The name of the container runtime process.")
   372  	flags.StringVar(&TestContext.ContainerRuntimePidFile, "container-runtime-pid-file", "/run/containerd/containerd.pid", "The pid file of the container runtime.")
   373  	flags.StringVar(&TestContext.SystemdServices, "systemd-services", "containerd*", "The comma separated list of systemd services the framework will dump logs for.")
   374  	flags.BoolVar(&TestContext.DumpSystemdJournal, "dump-systemd-journal", false, "Whether to dump the full systemd journal.")
   375  	flags.StringVar(&TestContext.ImageServiceEndpoint, "image-service-endpoint", "", "The image service endpoint of cluster VM instances.")
   376  	flags.StringVar(&TestContext.NonblockingTaints, "non-blocking-taints", `node-role.kubernetes.io/control-plane`, "Nodes with taints in this comma-delimited list will not block the test framework from starting tests.")
   377  
   378  	flags.BoolVar(&TestContext.ListImages, "list-images", false, "If true, will show list of images used for running tests.")
   379  	flags.BoolVar(&TestContext.listLabels, "list-labels", false, "If true, will show the list of labels that can be used to select tests via -ginkgo.label-filter.")
   380  	flags.BoolVar(&TestContext.listTests, "list-tests", false, "If true, will show the full names of all tests (aka specs) that can be used to select test via -ginkgo.focus/skip.")
   381  	flags.StringVar(&TestContext.KubectlPath, "kubectl-path", "kubectl", "The kubectl binary to use. For development, you might use 'cluster/kubectl.sh' here.")
   382  
   383  	flags.StringVar(&TestContext.ProgressReportURL, "progress-report-url", "", "The URL to POST progress updates to as the suite runs to assist in aiding integrations. If empty, no messages sent.")
   384  	flags.StringVar(&TestContext.SpecSummaryOutput, "spec-dump", "", "The file to dump all ginkgo.SpecSummary to after tests run. If empty, no objects are saved/printed.")
   385  	flags.StringVar(&TestContext.DockerConfigFile, "docker-config-file", "", "A docker credential file which contains authorization token that is used to perform image pull tests from an authenticated registry. For more details regarding the content of the file refer https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#log-in-to-docker-hub")
   386  
   387  	flags.StringVar(&TestContext.E2EDockerConfigFile, "e2e-docker-config-file", "", "A docker credentials configuration file used which contains authorization token that can be used to pull images from certain private registries provided by the users. For more details refer https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#log-in-to-docker-hub")
   388  	flags.StringVar(&TestContext.KubeTestRepoList, "kube-test-repo-list", "", "A yaml file used for overriding registries for test images. Alternatively, the KUBE_TEST_REPO_LIST env variable can be set.")
   389  
   390  	flags.StringVar(&TestContext.SnapshotControllerPodName, "snapshot-controller-pod-name", "", "The pod name to use for identifying the snapshot controller in the kube-system namespace.")
   391  	flags.IntVar(&TestContext.SnapshotControllerHTTPPort, "snapshot-controller-http-port", 0, "The port to use for snapshot controller HTTP communication.")
   392  
   393  	flags.Var(&stringArrayValue{&TestContext.EnabledVolumeDrivers}, "enabled-volume-drivers", "Comma-separated list of in-tree volume drivers to enable for testing. This is only needed for in-tree drivers disabled by default. An example is gcepd; see test/e2e/storage/in_tree_volumes.go for full details.")
   394  }
   395  
   396  func CreateGinkgoConfig() (types.SuiteConfig, types.ReporterConfig) {
   397  	// fetch the current config
   398  	suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration()
   399  	// Randomize specs as well as suites
   400  	suiteConfig.RandomizeAllSpecs = true
   401  	// Disable skipped tests unless they are explicitly requested.
   402  	if len(suiteConfig.FocusStrings) == 0 && len(suiteConfig.SkipStrings) == 0 {
   403  		suiteConfig.SkipStrings = []string{`\[Flaky\]|\[Feature:.+\]`}
   404  	}
   405  	return suiteConfig, reporterConfig
   406  }
   407  
   408  // RegisterClusterFlags registers flags specific to the cluster e2e test suite.
   409  func RegisterClusterFlags(flags *flag.FlagSet) {
   410  	flags.BoolVar(&TestContext.VerifyServiceAccount, "e2e-verify-service-account", true, "If true tests will verify the service account before running.")
   411  	flags.StringVar(&TestContext.KubeConfig, clientcmd.RecommendedConfigPathFlag, os.Getenv(clientcmd.RecommendedConfigPathEnvVar), "Path to kubeconfig containing embedded authinfo.")
   412  	flags.StringVar(&TestContext.KubeContext, clientcmd.FlagContext, "", "kubeconfig context to use/override. If unset, will use value from 'current-context'")
   413  	flags.StringVar(&TestContext.KubeAPIContentType, "kube-api-content-type", "application/vnd.kubernetes.protobuf", "ContentType used to communicate with apiserver")
   414  
   415  	flags.StringVar(&TestContext.KubeletRootDir, "kubelet-root-dir", "/var/lib/kubelet", "The data directory of kubelet. Some tests (for example, CSI storage tests) deploy DaemonSets which need to know this value and cannot query it. Such tests only work in clusters where the data directory is the same on all nodes.")
   416  	flags.StringVar(&TestContext.KubeletRootDir, "volume-dir", "/var/lib/kubelet", "An alias for --kubelet-root-dir, kept for backwards compatibility.")
   417  	flags.StringVar(&TestContext.CertDir, "cert-dir", "", "Path to the directory containing the certs. Default is empty, which doesn't use certs.")
   418  	flags.StringVar(&TestContext.RepoRoot, "repo-root", "../../", "Root directory of kubernetes repository, for finding test files.")
   419  	// NOTE: Node E2E tests have this flag defined as well, but true by default.
   420  	// If this becomes true as well, they should be refactored into RegisterCommonFlags.
   421  	flags.BoolVar(&TestContext.PrepullImages, "prepull-images", false, "If true, prepull images so image pull failures do not cause test failures.")
   422  	flags.StringVar(&TestContext.Provider, "provider", "", "The name of the Kubernetes provider (gce, gke, local, skeleton (the fallback if not set), etc.)")
   423  	flags.StringVar(&TestContext.Tooling, "tooling", "", "The tooling in use (kops, gke, etc.)")
   424  	flags.StringVar(&TestContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")
   425  	flags.StringVar(&TestContext.Prefix, "prefix", "e2e", "A prefix to be added to cloud resources created during testing.")
   426  	flags.StringVar(&TestContext.MasterOSDistro, "master-os-distro", "debian", "The OS distribution of cluster master (debian, ubuntu, gci, coreos, or custom).")
   427  	flags.StringVar(&TestContext.NodeOSDistro, "node-os-distro", "debian", "The OS distribution of cluster VM instances (debian, ubuntu, gci, coreos, windows, or custom), which determines how specific tests are implemented.")
   428  	flags.StringVar(&TestContext.NodeOSArch, "node-os-arch", "amd64", "The OS architecture of cluster VM instances (amd64, arm64, or custom).")
   429  	flags.StringVar(&TestContext.ClusterDNSDomain, "dns-domain", "cluster.local", "The DNS Domain of the cluster.")
   430  
   431  	// TODO: Flags per provider?  Rename gce-project/gce-zone?
   432  	cloudConfig := &TestContext.CloudConfig
   433  	flags.StringVar(&cloudConfig.MasterName, "kube-master", "", "Name of the kubernetes master. Only required if provider is gce or gke")
   434  	flags.StringVar(&cloudConfig.APIEndpoint, "gce-api-endpoint", "", "The GCE APIEndpoint being used, if applicable")
   435  	flags.StringVar(&cloudConfig.ProjectID, "gce-project", "", "The GCE project being used, if applicable")
   436  	flags.StringVar(&cloudConfig.Zone, "gce-zone", "", "GCE zone being used, if applicable")
   437  	flags.Var(cliflag.NewStringSlice(&cloudConfig.Zones), "gce-zones", "The set of zones to use in a multi-zone test instead of querying the cloud provider.")
   438  	flags.StringVar(&cloudConfig.Region, "gce-region", "", "GCE region being used, if applicable")
   439  	flags.BoolVar(&cloudConfig.MultiZone, "gce-multizone", false, "If true, start GCE cloud provider with multizone support.")
   440  	flags.BoolVar(&cloudConfig.MultiMaster, "gce-multimaster", false, "If true, the underlying GCE/GKE cluster is assumed to be multi-master.")
   441  	flags.StringVar(&cloudConfig.Cluster, "gke-cluster", "", "GKE name of cluster being used, if applicable")
   442  	flags.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws. If there is more than one group: comma separated list of groups.")
   443  	flags.StringVar(&cloudConfig.Network, "network", "e2e", "The cloud provider network for this e2e cluster.")
   444  	flags.IntVar(&cloudConfig.NumNodes, "num-nodes", DefaultNumNodes, fmt.Sprintf("Number of nodes in the cluster. If the default value of '%q' is used the number of schedulable nodes is auto-detected.", DefaultNumNodes))
   445  	flags.StringVar(&cloudConfig.ClusterIPRange, "cluster-ip-range", "10.64.0.0/14", "A CIDR notation IP range from which to assign IPs in the cluster.")
   446  	flags.StringVar(&cloudConfig.NodeTag, "node-tag", "", "Network tags used on node instances. Valid only for gce, gke")
   447  	flags.StringVar(&cloudConfig.MasterTag, "master-tag", "", "Network tags used on master instances. Valid only for gce, gke")
   448  
   449  	flags.StringVar(&cloudConfig.ClusterTag, "cluster-tag", "", "Tag used to identify resources.  Only required if provider is aws.")
   450  	flags.StringVar(&cloudConfig.ConfigFile, "cloud-config-file", "", "Cloud config file.  Only required if provider is azure or vsphere.")
   451  	flags.IntVar(&TestContext.MinStartupPods, "minStartupPods", 0, "The number of pods which we need to see in 'Running' state with a 'Ready' condition of true, before we try running tests. This is useful in any cluster which needs some base pod-based services running before it can be used. If set to -1, no pods are checked and tests run straight away.")
   452  	flags.DurationVar(&TestContext.timeouts.SystemPodsStartup, "system-pods-startup-timeout", TestContext.timeouts.SystemPodsStartup, "Timeout for waiting for all system pods to be running before starting tests.")
   453  	flags.DurationVar(&TestContext.timeouts.NodeSchedulable, "node-schedulable-timeout", TestContext.timeouts.NodeSchedulable, "Timeout for waiting for all nodes to be schedulable.")
   454  	flags.DurationVar(&TestContext.timeouts.SystemDaemonsetStartup, "system-daemonsets-startup-timeout", TestContext.timeouts.SystemDaemonsetStartup, "Timeout for waiting for all system daemonsets to be ready.")
   455  	flags.StringVar(&TestContext.EtcdUpgradeStorage, "etcd-upgrade-storage", "", "The storage version to upgrade to (either 'etcdv2' or 'etcdv3') if doing an etcd upgrade test.")
   456  	flags.StringVar(&TestContext.EtcdUpgradeVersion, "etcd-upgrade-version", "", "The etcd binary version to upgrade to (e.g., '3.0.14', '2.3.7') if doing an etcd upgrade test.")
   457  	flags.StringVar(&TestContext.GCEUpgradeScript, "gce-upgrade-script", "", "Script to use to upgrade a GCE cluster.")
   458  	flags.BoolVar(&TestContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to Cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
   459  
   460  	nodeKiller := &TestContext.NodeKiller
   461  	flags.BoolVar(&nodeKiller.Enabled, "node-killer", false, "Whether NodeKiller should kill any nodes.")
   462  	flags.Float64Var(&nodeKiller.FailureRatio, "node-killer-failure-ratio", 0.01, "Percentage of nodes to be killed")
   463  	flags.DurationVar(&nodeKiller.Interval, "node-killer-interval", 1*time.Minute, "Time between node failures.")
   464  	flags.Float64Var(&nodeKiller.JitterFactor, "node-killer-jitter-factor", 60, "Factor used to jitter node failures.")
   465  	flags.DurationVar(&nodeKiller.SimulatedDowntime, "node-killer-simulated-downtime", 10*time.Minute, "A delay between node death and recreation")
   466  }
   467  
   468  // GenerateSecureToken returns a string of length tokenLen, consisting
   469  // of random bytes encoded as base64 for use as a Bearer Token during
   470  // communication with an APIServer
   471  func GenerateSecureToken(tokenLen int) (string, error) {
   472  	// Number of bytes to be tokenLen when base64 encoded.
   473  	tokenSize := math.Ceil(float64(tokenLen) * 6 / 8)
   474  	rawToken := make([]byte, int(tokenSize))
   475  	if _, err := rand.Read(rawToken); err != nil {
   476  		return "", err
   477  	}
   478  	encoded := base64.RawURLEncoding.EncodeToString(rawToken)
   479  	token := encoded[:tokenLen]
   480  	return token, nil
   481  }
   482  
   483  // AfterReadingAllFlags makes changes to the context after all flags
   484  // have been read and prepares the process for a test run.
   485  func AfterReadingAllFlags(t *TestContextType) {
   486  	// Reconfigure klog so that output goes to the GinkgoWriter instead
   487  	// of stderr. The advantage is that it then gets interleaved properly
   488  	// with output that goes to GinkgoWriter (By, Logf).
   489  
   490  	// These flags are not exposed via the normal command line flag set,
   491  	// therefore we have to use our own private one here.
   492  	if t.KubeTestRepoList != "" {
   493  		image.Init(t.KubeTestRepoList)
   494  	}
   495  	var fs flag.FlagSet
   496  	klog.InitFlags(&fs)
   497  	fs.Set("logtostderr", "false")
   498  	fs.Set("alsologtostderr", "false")
   499  	fs.Set("one_output", "true")
   500  	fs.Set("stderrthreshold", "10" /* higher than any of the severities -> none pass the threshold */)
   501  	klog.SetOutput(ginkgo.GinkgoWriter)
   502  
   503  	if t.ListImages {
   504  		for _, v := range image.GetImageConfigs() {
   505  			fmt.Println(v.GetE2EImage())
   506  		}
   507  		Exit(0)
   508  	}
   509  
   510  	// Reconfigure gomega defaults. The poll interval should be suitable
   511  	// for most tests. The timeouts are more subjective and tests may want
   512  	// to override them, but these defaults are still better for E2E than the
   513  	// ones from Gomega (1s timeout, 10ms interval).
   514  	gomega.SetDefaultEventuallyPollingInterval(t.timeouts.Poll)
   515  	gomega.SetDefaultConsistentlyPollingInterval(t.timeouts.Poll)
   516  	gomega.SetDefaultEventuallyTimeout(t.timeouts.PodStart)
   517  	gomega.SetDefaultConsistentlyDuration(t.timeouts.PodStartShort)
   518  
   519  	// ginkgo.PreviewSpecs will expand all nodes and thus may find new bugs.
   520  	report := ginkgo.PreviewSpecs("Kubernetes e2e test statistics")
   521  	validateSpecs(report.SpecReports)
   522  	if err := FormatBugs(); CheckForBugs && err != nil {
   523  		// Refuse to do anything if the E2E suite is buggy.
   524  		fmt.Fprint(Output, "ERROR: E2E suite initialization was faulty, these errors must be fixed:")
   525  		fmt.Fprint(Output, "\n"+err.Error())
   526  		Exit(1)
   527  	}
   528  	if t.listLabels || t.listTests {
   529  		listTestInformation(report)
   530  		Exit(0)
   531  	}
   532  
   533  	// Only set a default host if one won't be supplied via kubeconfig
   534  	if len(t.Host) == 0 && len(t.KubeConfig) == 0 {
   535  		// Check if we can use the in-cluster config
   536  		if clusterConfig, err := restclient.InClusterConfig(); err == nil {
   537  			if tempFile, err := os.CreateTemp(os.TempDir(), "kubeconfig-"); err == nil {
   538  				kubeConfig := kubeconfig.CreateKubeConfig(clusterConfig)
   539  				clientcmd.WriteToFile(*kubeConfig, tempFile.Name())
   540  				t.KubeConfig = tempFile.Name()
   541  				klog.V(4).Infof("Using a temporary kubeconfig file from in-cluster config : %s", tempFile.Name())
   542  			}
   543  		}
   544  		if len(t.KubeConfig) == 0 {
   545  			klog.Warningf("Unable to find in-cluster config, using default host : %s", defaultHost)
   546  			t.Host = defaultHost
   547  		}
   548  	}
   549  	if len(t.BearerToken) == 0 {
   550  		var err error
   551  		t.BearerToken, err = GenerateSecureToken(16)
   552  		if err != nil {
   553  			klog.Fatalf("Failed to generate bearer token: %v", err)
   554  		}
   555  	}
   556  
   557  	// Allow 1% of nodes to be unready (statistically) - relevant for large clusters.
   558  	if t.AllowedNotReadyNodes == 0 {
   559  		t.AllowedNotReadyNodes = t.CloudConfig.NumNodes / 100
   560  	}
   561  
   562  	klog.V(4).Infof("Tolerating taints %q when considering if nodes are ready", TestContext.NonblockingTaints)
   563  
   564  	// Make sure that all test runs have a valid TestContext.CloudConfig.Provider.
   565  	// TODO: whether and how long this code is needed is getting discussed
   566  	// in https://github.com/kubernetes/kubernetes/issues/70194.
   567  	if TestContext.Provider == "" {
   568  		// Some users of the e2e.test binary pass --provider=.
   569  		// We need to support that, changing it would break those usages.
   570  		Logf("The --provider flag is not set. Continuing as if --provider=skeleton had been used.")
   571  		TestContext.Provider = "skeleton"
   572  	}
   573  
   574  	var err error
   575  	TestContext.CloudConfig.Provider, err = SetupProviderConfig(TestContext.Provider)
   576  	if err != nil {
   577  		if os.IsNotExist(errors.Unwrap(err)) {
   578  			// Provide a more helpful error message when the provider is unknown.
   579  			var providers []string
   580  			for _, name := range GetProviders() {
   581  				// The empty string is accepted, but looks odd in the output below unless we quote it.
   582  				if name == "" {
   583  					name = `""`
   584  				}
   585  				providers = append(providers, name)
   586  			}
   587  			sort.Strings(providers)
   588  			klog.Errorf("Unknown provider %q. The following providers are known: %v", TestContext.Provider, strings.Join(providers, " "))
   589  		} else {
   590  			klog.Errorf("Failed to setup provider config for %q: %v", TestContext.Provider, err)
   591  		}
   592  		Exit(1)
   593  	}
   594  
   595  	if TestContext.ReportDir != "" {
   596  		// Create the directory before running the suite. If
   597  		// --report-dir is not unusable, we should report
   598  		// that as soon as possible. This will be done by each worker
   599  		// in parallel, so we will get "exists" error in most of them.
   600  		if err := os.MkdirAll(TestContext.ReportDir, 0777); err != nil && !os.IsExist(err) {
   601  			klog.Errorf("Create report dir: %v", err)
   602  			Exit(1)
   603  		}
   604  		ginkgoDir := path.Join(TestContext.ReportDir, "ginkgo")
   605  		if TestContext.ReportCompleteGinkgo || TestContext.ReportCompleteJUnit {
   606  			if err := os.MkdirAll(ginkgoDir, 0777); err != nil && !os.IsExist(err) {
   607  				klog.Errorf("Create <report-dir>/ginkgo: %v", err)
   608  				Exit(1)
   609  			}
   610  		}
   611  
   612  		if TestContext.ReportCompleteGinkgo {
   613  			ginkgo.ReportAfterSuite("Ginkgo JSON report", func(report ginkgo.Report) {
   614  				ExpectNoError(reporters.GenerateJSONReport(report, path.Join(ginkgoDir, "report.json")))
   615  			})
   616  			ginkgo.ReportAfterSuite("JUnit XML report", func(report ginkgo.Report) {
   617  				ExpectNoError(reporters.GenerateJUnitReport(report, path.Join(ginkgoDir, "report.xml")))
   618  			})
   619  		}
   620  
   621  		ginkgo.ReportAfterSuite("Kubernetes e2e JUnit report", func(report ginkgo.Report) {
   622  			// With Ginkgo v1, we used to write one file per
   623  			// parallel node. Now Ginkgo v2 automatically merges
   624  			// all results into a report for us. The 01 suffix is
   625  			// kept in case that users expect files to be called
   626  			// "junit_<prefix><number>.xml".
   627  			junitReport := path.Join(TestContext.ReportDir, "junit_"+TestContext.ReportPrefix+"01.xml")
   628  
   629  			// writeJUnitReport generates a JUnit file in the e2e
   630  			// report directory that is shorter than the one
   631  			// normally written by `ginkgo --junit-report`. This is
   632  			// needed because the full report can become too large
   633  			// for tools like Spyglass
   634  			// (https://github.com/kubernetes/kubernetes/issues/111510).
   635  			ExpectNoError(junit.WriteJUnitReport(report, junitReport))
   636  		})
   637  	}
   638  }
   639  
   640  func listTestInformation(report ginkgo.Report) {
   641  	indent := strings.Repeat(" ", 4)
   642  
   643  	if TestContext.listLabels {
   644  		labels := sets.New[string]()
   645  		for _, spec := range report.SpecReports {
   646  			if spec.LeafNodeType == types.NodeTypeIt {
   647  				labels.Insert(spec.Labels()...)
   648  			}
   649  		}
   650  		fmt.Fprintf(Output, "The following labels can be used with 'gingko run --label-filter':\n%s%s\n\n", indent, strings.Join(sets.List(labels), "\n"+indent))
   651  	}
   652  	if TestContext.listTests {
   653  		leafs := make([][]string, 0, len(report.SpecReports))
   654  		wd, _ := os.Getwd()
   655  		for _, spec := range report.SpecReports {
   656  			if spec.LeafNodeType == types.NodeTypeIt {
   657  				leafs = append(leafs, []string{fmt.Sprintf("%s:%d: ", relativePath(wd, spec.LeafNodeLocation.FileName), spec.LeafNodeLocation.LineNumber), spec.FullText()})
   658  			}
   659  		}
   660  		// Sort by test name, not the source code location, because the test
   661  		// name is more stable across code refactoring.
   662  		sort.Slice(leafs, func(i, j int) bool {
   663  			return leafs[i][1] < leafs[j][1]
   664  		})
   665  		fmt.Fprint(Output, "The following spec names can be used with 'ginkgo run --focus/skip':\n")
   666  		for _, leaf := range leafs {
   667  			fmt.Fprintf(Output, "%s%s%s\n", indent, leaf[0], leaf[1])
   668  		}
   669  		fmt.Fprint(Output, "\n")
   670  	}
   671  }
   672  
   673  func relativePath(wd, path string) string {
   674  	if wd == "" {
   675  		return path
   676  	}
   677  	relpath, err := filepath.Rel(wd, path)
   678  	if err != nil {
   679  		return path
   680  	}
   681  	return relpath
   682  }