k8s.io/kubernetes@v1.29.3/test/e2e/framework/test_context.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package framework 18 19 import ( 20 "context" 21 "crypto/rand" 22 "encoding/base64" 23 "errors" 24 "flag" 25 "fmt" 26 "io" 27 "math" 28 "os" 29 "path" 30 "path/filepath" 31 "sort" 32 "strings" 33 "time" 34 35 "github.com/onsi/ginkgo/v2" 36 "github.com/onsi/ginkgo/v2/reporters" 37 "github.com/onsi/ginkgo/v2/types" 38 "github.com/onsi/gomega" 39 gomegaformat "github.com/onsi/gomega/format" 40 41 "k8s.io/apimachinery/pkg/util/sets" 42 restclient "k8s.io/client-go/rest" 43 "k8s.io/client-go/tools/clientcmd" 44 cliflag "k8s.io/component-base/cli/flag" 45 "k8s.io/klog/v2" 46 47 "k8s.io/kubernetes/test/e2e/framework/internal/junit" 48 "k8s.io/kubernetes/test/utils/image" 49 "k8s.io/kubernetes/test/utils/kubeconfig" 50 ) 51 52 const ( 53 defaultHost = "https://127.0.0.1:6443" 54 55 // DefaultNumNodes is the number of nodes. If not specified, then number of nodes is auto-detected 56 DefaultNumNodes = -1 57 ) 58 59 var ( 60 // Output is used for output when not running tests, for example in -list-tests. 61 // Test output should go to ginkgo.GinkgoWriter. 62 Output io.Writer = os.Stdout 63 64 // Exit is called when the framework detects fatal errors or when 65 // it is done with the execution of e.g. -list-tests. 66 Exit = os.Exit 67 68 // CheckForBugs determines whether the framework bails out when 69 // test initialization found any bugs. 70 CheckForBugs = true 71 ) 72 73 // TestContextType contains test settings and global state. Due to 74 // historic reasons, it is a mixture of items managed by the test 75 // framework itself, cloud providers and individual tests. 76 // The goal is to move anything not required by the framework 77 // into the code which uses the settings. 78 // 79 // The recommendation for those settings is: 80 // - They are stored in their own context structure or local 81 // variables. 82 // - The standard `flag` package is used to register them. 83 // The flag name should follow the pattern <part1>.<part2>....<partn> 84 // where the prefix is unlikely to conflict with other tests or 85 // standard packages and each part is in lower camel case. For 86 // example, test/e2e/storage/csi/context.go could define 87 // storage.csi.numIterations. 88 // - framework/config can be used to simplify the registration of 89 // multiple options with a single function call: 90 // var storageCSI { 91 // NumIterations `default:"1" usage:"number of iterations"` 92 // } 93 // _ config.AddOptions(&storageCSI, "storage.csi") 94 // - The direct use Viper in tests is possible, but discouraged because 95 // it only works in test suites which use Viper (which is not 96 // required) and the supported options cannot be 97 // discovered by a test suite user. 98 // 99 // Test suite authors can use framework/viper to make all command line 100 // parameters also configurable via a configuration file. 101 type TestContextType struct { 102 KubeConfig string 103 KubeContext string 104 KubeAPIContentType string 105 KubeletRootDir string 106 KubeletConfigDropinDir string 107 CertDir string 108 Host string 109 BearerToken string `datapolicy:"token"` 110 // TODO: Deprecating this over time... instead just use gobindata_util.go , see #23987. 111 RepoRoot string 112 // ListImages will list off all images that are used then quit 113 ListImages bool 114 115 listTests, listLabels bool 116 117 // ListConformanceTests will list off all conformance tests that are available then quit 118 ListConformanceTests bool 119 120 // Provider identifies the infrastructure provider (gce, gke, aws) 121 Provider string 122 123 // Tooling is the tooling in use (e.g. kops, gke). Provider is the cloud provider and might not uniquely identify the tooling. 124 Tooling string 125 126 // timeouts contains user-configurable timeouts for various operations. 127 // Individual Framework instance also have such timeouts which may be 128 // different from these here. To avoid confusion, this field is not 129 // exported. Its values can be accessed through 130 // NewTimeoutContext. 131 timeouts TimeoutContext 132 133 CloudConfig CloudConfig 134 KubectlPath string 135 OutputDir string 136 ReportDir string 137 ReportPrefix string 138 ReportCompleteGinkgo bool 139 ReportCompleteJUnit bool 140 Prefix string 141 MinStartupPods int 142 EtcdUpgradeStorage string 143 EtcdUpgradeVersion string 144 GCEUpgradeScript string 145 ContainerRuntimeEndpoint string 146 ContainerRuntimeProcessName string 147 ContainerRuntimePidFile string 148 // SystemdServices are comma separated list of systemd services the test framework 149 // will dump logs for. 150 SystemdServices string 151 // DumpSystemdJournal controls whether to dump the full systemd journal. 152 DumpSystemdJournal bool 153 ImageServiceEndpoint string 154 MasterOSDistro string 155 NodeOSDistro string 156 NodeOSArch string 157 VerifyServiceAccount bool 158 DeleteNamespace bool 159 DeleteNamespaceOnFailure bool 160 AllowedNotReadyNodes int 161 CleanStart bool 162 // If set to 'true' or 'all' framework will start a goroutine monitoring resource usage of system add-ons. 163 // It will read the data every 30 seconds from all Nodes and print summary during afterEach. If set to 'master' 164 // only master Node will be monitored. 165 GatherKubeSystemResourceUsageData string 166 GatherLogsSizes bool 167 GatherMetricsAfterTest string 168 GatherSuiteMetricsAfterTest bool 169 MaxNodesToGather int 170 // If set to 'true' framework will gather ClusterAutoscaler metrics when gathering them for other components. 171 IncludeClusterAutoscalerMetrics bool 172 // Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list. 173 OutputPrintType string 174 // CreateTestingNS is responsible for creating namespace used for executing e2e tests. 175 // It accepts namespace base name, which will be prepended with e2e prefix, kube client 176 // and labels to be applied to a namespace. 177 CreateTestingNS CreateTestingNSFn 178 // If set to true test will dump data about the namespace in which test was running. 179 DumpLogsOnFailure bool 180 // Disables dumping cluster log from master and nodes after all tests. 181 DisableLogDump bool 182 // Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty. 183 LogexporterGCSPath string 184 // Node e2e specific test context 185 NodeTestContextType 186 187 // The DNS Domain of the cluster. 188 ClusterDNSDomain string 189 190 // The configuration of NodeKiller. 191 NodeKiller NodeKillerConfig 192 193 // The Default IP Family of the cluster ("ipv4" or "ipv6") 194 IPFamily string 195 196 // NonblockingTaints is the comma-delimeted string given by the user to specify taints which should not stop the test framework from running tests. 197 NonblockingTaints string 198 199 // ProgressReportURL is the URL which progress updates will be posted to as tests complete. If empty, no updates are sent. 200 ProgressReportURL string 201 202 // SriovdpConfigMapFile is the path to the ConfigMap to configure the SRIOV device plugin on this host. 203 SriovdpConfigMapFile string 204 205 // SpecSummaryOutput is the file to write ginkgo.SpecSummary objects to as tests complete. Useful for debugging and test introspection. 206 SpecSummaryOutput string 207 208 // DockerConfigFile is a file that contains credentials which can be used to pull images from certain private registries, needed for a test. 209 DockerConfigFile string 210 211 // E2EDockerConfigFile is a docker credentials configuration file used which contains authorization token that can be used to pull images from certain private registries provided by the users. 212 // For more details refer https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#log-in-to-docker-hub 213 E2EDockerConfigFile string 214 215 // KubeTestRepoConfigFile is a yaml file used for overriding registries for test images. 216 KubeTestRepoList string 217 218 // SnapshotControllerPodName is the name used for identifying the snapshot controller pod. 219 SnapshotControllerPodName string 220 221 // SnapshotControllerHTTPPort the port used for communicating with the snapshot controller HTTP endpoint. 222 SnapshotControllerHTTPPort int 223 224 // RequireDevices makes mandatory on the environment on which tests are run 1+ devices exposed through device plugins. 225 // With this enabled The e2e tests requiring devices for their operation can assume that if devices aren't reported, the test can fail 226 RequireDevices bool 227 228 // Enable volume drivers which are disabled by default. See test/e2e/storage/in_tree_volumes.go for details. 229 EnabledVolumeDrivers []string 230 } 231 232 // NodeKillerConfig describes configuration of NodeKiller -- a utility to 233 // simulate node failures. 234 // 235 // TODO: move this and the corresponding command line flags into 236 // test/e2e/framework/node. 237 type NodeKillerConfig struct { 238 // Enabled determines whether NodeKill should do anything at all. 239 // All other options below are ignored if Enabled = false. 240 Enabled bool 241 // FailureRatio is a percentage of all nodes that could fail simultinously. 242 FailureRatio float64 243 // Interval is time between node failures. 244 Interval time.Duration 245 // JitterFactor is factor used to jitter node failures. 246 // Node will be killed between [Interval, Interval + (1.0 + JitterFactor)]. 247 JitterFactor float64 248 // SimulatedDowntime is a duration between node is killed and recreated. 249 SimulatedDowntime time.Duration 250 // NodeKillerStopCtx is a context that is used to notify NodeKiller to stop killing nodes. 251 NodeKillerStopCtx context.Context 252 // NodeKillerStop is the cancel function for NodeKillerStopCtx. 253 NodeKillerStop func() 254 } 255 256 // NodeTestContextType is part of TestContextType, it is shared by all node e2e test. 257 type NodeTestContextType struct { 258 // NodeE2E indicates whether it is running node e2e. 259 NodeE2E bool 260 // Name of the node to run tests on. 261 NodeName string 262 // NodeConformance indicates whether the test is running in node conformance mode. 263 NodeConformance bool 264 // PrepullImages indicates whether node e2e framework should prepull images. 265 PrepullImages bool 266 // ImageDescription is the description of the image on which the test is running. 267 ImageDescription string 268 // RuntimeConfig is a map of API server runtime configuration values. 269 RuntimeConfig map[string]string 270 // SystemSpecName is the name of the system spec (e.g., gke) that's used in 271 // the node e2e test. If empty, the default one (system.DefaultSpec) is 272 // used. The system specs are in test/e2e_node/system/specs/. 273 SystemSpecName string 274 // RestartKubelet restarts Kubelet unit when the process is killed. 275 RestartKubelet bool 276 // ExtraEnvs is a map of environment names to values. 277 ExtraEnvs map[string]string 278 // StandaloneMode indicates whether the test is running kubelet in a standalone mode. 279 StandaloneMode bool 280 } 281 282 // CloudConfig holds the cloud configuration for e2e test suites. 283 type CloudConfig struct { 284 APIEndpoint string 285 ProjectID string 286 Zone string // for multizone tests, arbitrarily chosen zone 287 Zones []string // for multizone tests, use this set of zones instead of querying the cloud provider. Must include Zone. 288 Region string 289 MultiZone bool 290 MultiMaster bool 291 Cluster string 292 MasterName string 293 NodeInstanceGroup string // comma-delimited list of groups' names 294 NumNodes int 295 ClusterIPRange string 296 ClusterTag string 297 Network string 298 ConfigFile string // for azure 299 NodeTag string 300 MasterTag string 301 302 Provider ProviderInterface 303 } 304 305 // TestContext should be used by all tests to access common context data. 306 var TestContext = TestContextType{ 307 timeouts: defaultTimeouts, 308 } 309 310 // StringArrayValue is used with flag.Var for a comma-separated list of strings placed into a string array. 311 type stringArrayValue struct { 312 stringArray *[]string 313 } 314 315 func (v stringArrayValue) String() string { 316 if v.stringArray != nil { 317 return strings.Join(*v.stringArray, ",") 318 } 319 return "" 320 } 321 322 func (v stringArrayValue) Set(s string) error { 323 if len(s) == 0 { 324 *v.stringArray = []string{} 325 } else { 326 *v.stringArray = strings.Split(s, ",") 327 } 328 return nil 329 } 330 331 // ClusterIsIPv6 returns true if the cluster is IPv6 332 func (tc TestContextType) ClusterIsIPv6() bool { 333 return tc.IPFamily == "ipv6" 334 } 335 336 // RegisterCommonFlags registers flags common to all e2e test suites. 337 // The flag set can be flag.CommandLine (if desired) or a custom 338 // flag set that then gets passed to viperconfig.ViperizeFlags. 339 // 340 // The other Register*Flags methods below can be used to add more 341 // test-specific flags. However, those settings then get added 342 // regardless whether the test is actually in the test suite. 343 // 344 // For tests that have been converted to registering their 345 // options themselves, copy flags from test/e2e/framework/config 346 // as shown in HandleFlags. 347 func RegisterCommonFlags(flags *flag.FlagSet) { 348 // The default is too low for objects like pods, even when using YAML. We double the default. 349 flags.IntVar(&gomegaformat.MaxLength, "gomega-max-length", 8000, "Sets the maximum size for the gomega formatter (= gomega.MaxLength). Use 0 to disable truncation.") 350 351 flags.StringVar(&TestContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", "false", "If set to 'true' or 'all' framework will be monitoring resource usage of system all add-ons in (some) e2e tests, if set to 'master' framework will be monitoring master node only, if set to 'none' of 'false' monitoring will be turned off.") 352 flags.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.") 353 flags.IntVar(&TestContext.MaxNodesToGather, "max-nodes-to-gather-from", 20, "The maximum number of nodes to gather extended info from on test failure.") 354 flags.StringVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", "false", "If set to 'true' framework will gather metrics from all components after each test. If set to 'master' only master component metrics would be gathered.") 355 flags.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framework will gather metrics from all components after the whole test suite completes.") 356 flags.BoolVar(&TestContext.IncludeClusterAutoscalerMetrics, "include-cluster-autoscaler", false, "If set to true, framework will include Cluster Autoscaler when gathering metrics.") 357 flags.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.") 358 flags.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.") 359 flags.BoolVar(&TestContext.DisableLogDump, "disable-log-dump", false, "If set to true, logs from master and nodes won't be gathered after test run.") 360 flags.StringVar(&TestContext.LogexporterGCSPath, "logexporter-gcs-path", "", "Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty.") 361 flags.BoolVar(&TestContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.") 362 flags.BoolVar(&TestContext.DeleteNamespaceOnFailure, "delete-namespace-on-failure", true, "If true, framework will delete test namespace on failure. Used only during test debugging.") 363 flags.IntVar(&TestContext.AllowedNotReadyNodes, "allowed-not-ready-nodes", 0, "If greater than zero, framework will allow for that many non-ready nodes when checking for all ready nodes. If -1, no waiting will be performed for ready nodes or daemonset pods.") 364 365 flags.StringVar(&TestContext.Host, "host", "", fmt.Sprintf("The host, or apiserver, to connect to. Will default to %s if this argument and --kubeconfig are not set.", defaultHost)) 366 flags.StringVar(&TestContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.") 367 flags.StringVar(&TestContext.ReportDir, "report-dir", "", "Path to the directory where the simplified JUnit XML reports and other tests results should be saved. Default is empty, which doesn't generate these reports. If ginkgo's -junit-report parameter is used, that parameter instead of -report-dir determines the location of a single JUnit report.") 368 flags.BoolVar(&TestContext.ReportCompleteGinkgo, "report-complete-ginkgo", false, "Enables writing a complete test report as Ginkgo JSON to <report dir>/ginkgo/report.json. Ignored if --report-dir is not set.") 369 flags.BoolVar(&TestContext.ReportCompleteJUnit, "report-complete-junit", false, "Enables writing a complete test report as JUnit XML to <report dir>/ginkgo/report.json. Ignored if --report-dir is not set.") 370 flags.StringVar(&TestContext.ContainerRuntimeEndpoint, "container-runtime-endpoint", "unix:///run/containerd/containerd.sock", "The container runtime endpoint of cluster VM instances.") 371 flags.StringVar(&TestContext.ContainerRuntimeProcessName, "container-runtime-process-name", "containerd", "The name of the container runtime process.") 372 flags.StringVar(&TestContext.ContainerRuntimePidFile, "container-runtime-pid-file", "/run/containerd/containerd.pid", "The pid file of the container runtime.") 373 flags.StringVar(&TestContext.SystemdServices, "systemd-services", "containerd*", "The comma separated list of systemd services the framework will dump logs for.") 374 flags.BoolVar(&TestContext.DumpSystemdJournal, "dump-systemd-journal", false, "Whether to dump the full systemd journal.") 375 flags.StringVar(&TestContext.ImageServiceEndpoint, "image-service-endpoint", "", "The image service endpoint of cluster VM instances.") 376 flags.StringVar(&TestContext.NonblockingTaints, "non-blocking-taints", `node-role.kubernetes.io/control-plane`, "Nodes with taints in this comma-delimited list will not block the test framework from starting tests.") 377 378 flags.BoolVar(&TestContext.ListImages, "list-images", false, "If true, will show list of images used for running tests.") 379 flags.BoolVar(&TestContext.listLabels, "list-labels", false, "If true, will show the list of labels that can be used to select tests via -ginkgo.label-filter.") 380 flags.BoolVar(&TestContext.listTests, "list-tests", false, "If true, will show the full names of all tests (aka specs) that can be used to select test via -ginkgo.focus/skip.") 381 flags.StringVar(&TestContext.KubectlPath, "kubectl-path", "kubectl", "The kubectl binary to use. For development, you might use 'cluster/kubectl.sh' here.") 382 383 flags.StringVar(&TestContext.ProgressReportURL, "progress-report-url", "", "The URL to POST progress updates to as the suite runs to assist in aiding integrations. If empty, no messages sent.") 384 flags.StringVar(&TestContext.SpecSummaryOutput, "spec-dump", "", "The file to dump all ginkgo.SpecSummary to after tests run. If empty, no objects are saved/printed.") 385 flags.StringVar(&TestContext.DockerConfigFile, "docker-config-file", "", "A docker credential file which contains authorization token that is used to perform image pull tests from an authenticated registry. For more details regarding the content of the file refer https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#log-in-to-docker-hub") 386 387 flags.StringVar(&TestContext.E2EDockerConfigFile, "e2e-docker-config-file", "", "A docker credentials configuration file used which contains authorization token that can be used to pull images from certain private registries provided by the users. For more details refer https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#log-in-to-docker-hub") 388 flags.StringVar(&TestContext.KubeTestRepoList, "kube-test-repo-list", "", "A yaml file used for overriding registries for test images. Alternatively, the KUBE_TEST_REPO_LIST env variable can be set.") 389 390 flags.StringVar(&TestContext.SnapshotControllerPodName, "snapshot-controller-pod-name", "", "The pod name to use for identifying the snapshot controller in the kube-system namespace.") 391 flags.IntVar(&TestContext.SnapshotControllerHTTPPort, "snapshot-controller-http-port", 0, "The port to use for snapshot controller HTTP communication.") 392 393 flags.Var(&stringArrayValue{&TestContext.EnabledVolumeDrivers}, "enabled-volume-drivers", "Comma-separated list of in-tree volume drivers to enable for testing. This is only needed for in-tree drivers disabled by default. An example is gcepd; see test/e2e/storage/in_tree_volumes.go for full details.") 394 } 395 396 func CreateGinkgoConfig() (types.SuiteConfig, types.ReporterConfig) { 397 // fetch the current config 398 suiteConfig, reporterConfig := ginkgo.GinkgoConfiguration() 399 // Randomize specs as well as suites 400 suiteConfig.RandomizeAllSpecs = true 401 // Disable skipped tests unless they are explicitly requested. 402 if len(suiteConfig.FocusStrings) == 0 && len(suiteConfig.SkipStrings) == 0 { 403 suiteConfig.SkipStrings = []string{`\[Flaky\]|\[Feature:.+\]`} 404 } 405 return suiteConfig, reporterConfig 406 } 407 408 // RegisterClusterFlags registers flags specific to the cluster e2e test suite. 409 func RegisterClusterFlags(flags *flag.FlagSet) { 410 flags.BoolVar(&TestContext.VerifyServiceAccount, "e2e-verify-service-account", true, "If true tests will verify the service account before running.") 411 flags.StringVar(&TestContext.KubeConfig, clientcmd.RecommendedConfigPathFlag, os.Getenv(clientcmd.RecommendedConfigPathEnvVar), "Path to kubeconfig containing embedded authinfo.") 412 flags.StringVar(&TestContext.KubeContext, clientcmd.FlagContext, "", "kubeconfig context to use/override. If unset, will use value from 'current-context'") 413 flags.StringVar(&TestContext.KubeAPIContentType, "kube-api-content-type", "application/vnd.kubernetes.protobuf", "ContentType used to communicate with apiserver") 414 415 flags.StringVar(&TestContext.KubeletRootDir, "kubelet-root-dir", "/var/lib/kubelet", "The data directory of kubelet. Some tests (for example, CSI storage tests) deploy DaemonSets which need to know this value and cannot query it. Such tests only work in clusters where the data directory is the same on all nodes.") 416 flags.StringVar(&TestContext.KubeletRootDir, "volume-dir", "/var/lib/kubelet", "An alias for --kubelet-root-dir, kept for backwards compatibility.") 417 flags.StringVar(&TestContext.CertDir, "cert-dir", "", "Path to the directory containing the certs. Default is empty, which doesn't use certs.") 418 flags.StringVar(&TestContext.RepoRoot, "repo-root", "../../", "Root directory of kubernetes repository, for finding test files.") 419 // NOTE: Node E2E tests have this flag defined as well, but true by default. 420 // If this becomes true as well, they should be refactored into RegisterCommonFlags. 421 flags.BoolVar(&TestContext.PrepullImages, "prepull-images", false, "If true, prepull images so image pull failures do not cause test failures.") 422 flags.StringVar(&TestContext.Provider, "provider", "", "The name of the Kubernetes provider (gce, gke, local, skeleton (the fallback if not set), etc.)") 423 flags.StringVar(&TestContext.Tooling, "tooling", "", "The tooling in use (kops, gke, etc.)") 424 flags.StringVar(&TestContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.") 425 flags.StringVar(&TestContext.Prefix, "prefix", "e2e", "A prefix to be added to cloud resources created during testing.") 426 flags.StringVar(&TestContext.MasterOSDistro, "master-os-distro", "debian", "The OS distribution of cluster master (debian, ubuntu, gci, coreos, or custom).") 427 flags.StringVar(&TestContext.NodeOSDistro, "node-os-distro", "debian", "The OS distribution of cluster VM instances (debian, ubuntu, gci, coreos, windows, or custom), which determines how specific tests are implemented.") 428 flags.StringVar(&TestContext.NodeOSArch, "node-os-arch", "amd64", "The OS architecture of cluster VM instances (amd64, arm64, or custom).") 429 flags.StringVar(&TestContext.ClusterDNSDomain, "dns-domain", "cluster.local", "The DNS Domain of the cluster.") 430 431 // TODO: Flags per provider? Rename gce-project/gce-zone? 432 cloudConfig := &TestContext.CloudConfig 433 flags.StringVar(&cloudConfig.MasterName, "kube-master", "", "Name of the kubernetes master. Only required if provider is gce or gke") 434 flags.StringVar(&cloudConfig.APIEndpoint, "gce-api-endpoint", "", "The GCE APIEndpoint being used, if applicable") 435 flags.StringVar(&cloudConfig.ProjectID, "gce-project", "", "The GCE project being used, if applicable") 436 flags.StringVar(&cloudConfig.Zone, "gce-zone", "", "GCE zone being used, if applicable") 437 flags.Var(cliflag.NewStringSlice(&cloudConfig.Zones), "gce-zones", "The set of zones to use in a multi-zone test instead of querying the cloud provider.") 438 flags.StringVar(&cloudConfig.Region, "gce-region", "", "GCE region being used, if applicable") 439 flags.BoolVar(&cloudConfig.MultiZone, "gce-multizone", false, "If true, start GCE cloud provider with multizone support.") 440 flags.BoolVar(&cloudConfig.MultiMaster, "gce-multimaster", false, "If true, the underlying GCE/GKE cluster is assumed to be multi-master.") 441 flags.StringVar(&cloudConfig.Cluster, "gke-cluster", "", "GKE name of cluster being used, if applicable") 442 flags.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws. If there is more than one group: comma separated list of groups.") 443 flags.StringVar(&cloudConfig.Network, "network", "e2e", "The cloud provider network for this e2e cluster.") 444 flags.IntVar(&cloudConfig.NumNodes, "num-nodes", DefaultNumNodes, fmt.Sprintf("Number of nodes in the cluster. If the default value of '%q' is used the number of schedulable nodes is auto-detected.", DefaultNumNodes)) 445 flags.StringVar(&cloudConfig.ClusterIPRange, "cluster-ip-range", "10.64.0.0/14", "A CIDR notation IP range from which to assign IPs in the cluster.") 446 flags.StringVar(&cloudConfig.NodeTag, "node-tag", "", "Network tags used on node instances. Valid only for gce, gke") 447 flags.StringVar(&cloudConfig.MasterTag, "master-tag", "", "Network tags used on master instances. Valid only for gce, gke") 448 449 flags.StringVar(&cloudConfig.ClusterTag, "cluster-tag", "", "Tag used to identify resources. Only required if provider is aws.") 450 flags.StringVar(&cloudConfig.ConfigFile, "cloud-config-file", "", "Cloud config file. Only required if provider is azure or vsphere.") 451 flags.IntVar(&TestContext.MinStartupPods, "minStartupPods", 0, "The number of pods which we need to see in 'Running' state with a 'Ready' condition of true, before we try running tests. This is useful in any cluster which needs some base pod-based services running before it can be used. If set to -1, no pods are checked and tests run straight away.") 452 flags.DurationVar(&TestContext.timeouts.SystemPodsStartup, "system-pods-startup-timeout", TestContext.timeouts.SystemPodsStartup, "Timeout for waiting for all system pods to be running before starting tests.") 453 flags.DurationVar(&TestContext.timeouts.NodeSchedulable, "node-schedulable-timeout", TestContext.timeouts.NodeSchedulable, "Timeout for waiting for all nodes to be schedulable.") 454 flags.DurationVar(&TestContext.timeouts.SystemDaemonsetStartup, "system-daemonsets-startup-timeout", TestContext.timeouts.SystemDaemonsetStartup, "Timeout for waiting for all system daemonsets to be ready.") 455 flags.StringVar(&TestContext.EtcdUpgradeStorage, "etcd-upgrade-storage", "", "The storage version to upgrade to (either 'etcdv2' or 'etcdv3') if doing an etcd upgrade test.") 456 flags.StringVar(&TestContext.EtcdUpgradeVersion, "etcd-upgrade-version", "", "The etcd binary version to upgrade to (e.g., '3.0.14', '2.3.7') if doing an etcd upgrade test.") 457 flags.StringVar(&TestContext.GCEUpgradeScript, "gce-upgrade-script", "", "Script to use to upgrade a GCE cluster.") 458 flags.BoolVar(&TestContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to Cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.") 459 460 nodeKiller := &TestContext.NodeKiller 461 flags.BoolVar(&nodeKiller.Enabled, "node-killer", false, "Whether NodeKiller should kill any nodes.") 462 flags.Float64Var(&nodeKiller.FailureRatio, "node-killer-failure-ratio", 0.01, "Percentage of nodes to be killed") 463 flags.DurationVar(&nodeKiller.Interval, "node-killer-interval", 1*time.Minute, "Time between node failures.") 464 flags.Float64Var(&nodeKiller.JitterFactor, "node-killer-jitter-factor", 60, "Factor used to jitter node failures.") 465 flags.DurationVar(&nodeKiller.SimulatedDowntime, "node-killer-simulated-downtime", 10*time.Minute, "A delay between node death and recreation") 466 } 467 468 // GenerateSecureToken returns a string of length tokenLen, consisting 469 // of random bytes encoded as base64 for use as a Bearer Token during 470 // communication with an APIServer 471 func GenerateSecureToken(tokenLen int) (string, error) { 472 // Number of bytes to be tokenLen when base64 encoded. 473 tokenSize := math.Ceil(float64(tokenLen) * 6 / 8) 474 rawToken := make([]byte, int(tokenSize)) 475 if _, err := rand.Read(rawToken); err != nil { 476 return "", err 477 } 478 encoded := base64.RawURLEncoding.EncodeToString(rawToken) 479 token := encoded[:tokenLen] 480 return token, nil 481 } 482 483 // AfterReadingAllFlags makes changes to the context after all flags 484 // have been read and prepares the process for a test run. 485 func AfterReadingAllFlags(t *TestContextType) { 486 // Reconfigure klog so that output goes to the GinkgoWriter instead 487 // of stderr. The advantage is that it then gets interleaved properly 488 // with output that goes to GinkgoWriter (By, Logf). 489 490 // These flags are not exposed via the normal command line flag set, 491 // therefore we have to use our own private one here. 492 if t.KubeTestRepoList != "" { 493 image.Init(t.KubeTestRepoList) 494 } 495 var fs flag.FlagSet 496 klog.InitFlags(&fs) 497 fs.Set("logtostderr", "false") 498 fs.Set("alsologtostderr", "false") 499 fs.Set("one_output", "true") 500 fs.Set("stderrthreshold", "10" /* higher than any of the severities -> none pass the threshold */) 501 klog.SetOutput(ginkgo.GinkgoWriter) 502 503 if t.ListImages { 504 for _, v := range image.GetImageConfigs() { 505 fmt.Println(v.GetE2EImage()) 506 } 507 Exit(0) 508 } 509 510 // Reconfigure gomega defaults. The poll interval should be suitable 511 // for most tests. The timeouts are more subjective and tests may want 512 // to override them, but these defaults are still better for E2E than the 513 // ones from Gomega (1s timeout, 10ms interval). 514 gomega.SetDefaultEventuallyPollingInterval(t.timeouts.Poll) 515 gomega.SetDefaultConsistentlyPollingInterval(t.timeouts.Poll) 516 gomega.SetDefaultEventuallyTimeout(t.timeouts.PodStart) 517 gomega.SetDefaultConsistentlyDuration(t.timeouts.PodStartShort) 518 519 // ginkgo.PreviewSpecs will expand all nodes and thus may find new bugs. 520 report := ginkgo.PreviewSpecs("Kubernetes e2e test statistics") 521 validateSpecs(report.SpecReports) 522 if err := FormatBugs(); CheckForBugs && err != nil { 523 // Refuse to do anything if the E2E suite is buggy. 524 fmt.Fprint(Output, "ERROR: E2E suite initialization was faulty, these errors must be fixed:") 525 fmt.Fprint(Output, "\n"+err.Error()) 526 Exit(1) 527 } 528 if t.listLabels || t.listTests { 529 listTestInformation(report) 530 Exit(0) 531 } 532 533 // Only set a default host if one won't be supplied via kubeconfig 534 if len(t.Host) == 0 && len(t.KubeConfig) == 0 { 535 // Check if we can use the in-cluster config 536 if clusterConfig, err := restclient.InClusterConfig(); err == nil { 537 if tempFile, err := os.CreateTemp(os.TempDir(), "kubeconfig-"); err == nil { 538 kubeConfig := kubeconfig.CreateKubeConfig(clusterConfig) 539 clientcmd.WriteToFile(*kubeConfig, tempFile.Name()) 540 t.KubeConfig = tempFile.Name() 541 klog.V(4).Infof("Using a temporary kubeconfig file from in-cluster config : %s", tempFile.Name()) 542 } 543 } 544 if len(t.KubeConfig) == 0 { 545 klog.Warningf("Unable to find in-cluster config, using default host : %s", defaultHost) 546 t.Host = defaultHost 547 } 548 } 549 if len(t.BearerToken) == 0 { 550 var err error 551 t.BearerToken, err = GenerateSecureToken(16) 552 if err != nil { 553 klog.Fatalf("Failed to generate bearer token: %v", err) 554 } 555 } 556 557 // Allow 1% of nodes to be unready (statistically) - relevant for large clusters. 558 if t.AllowedNotReadyNodes == 0 { 559 t.AllowedNotReadyNodes = t.CloudConfig.NumNodes / 100 560 } 561 562 klog.V(4).Infof("Tolerating taints %q when considering if nodes are ready", TestContext.NonblockingTaints) 563 564 // Make sure that all test runs have a valid TestContext.CloudConfig.Provider. 565 // TODO: whether and how long this code is needed is getting discussed 566 // in https://github.com/kubernetes/kubernetes/issues/70194. 567 if TestContext.Provider == "" { 568 // Some users of the e2e.test binary pass --provider=. 569 // We need to support that, changing it would break those usages. 570 Logf("The --provider flag is not set. Continuing as if --provider=skeleton had been used.") 571 TestContext.Provider = "skeleton" 572 } 573 574 var err error 575 TestContext.CloudConfig.Provider, err = SetupProviderConfig(TestContext.Provider) 576 if err != nil { 577 if os.IsNotExist(errors.Unwrap(err)) { 578 // Provide a more helpful error message when the provider is unknown. 579 var providers []string 580 for _, name := range GetProviders() { 581 // The empty string is accepted, but looks odd in the output below unless we quote it. 582 if name == "" { 583 name = `""` 584 } 585 providers = append(providers, name) 586 } 587 sort.Strings(providers) 588 klog.Errorf("Unknown provider %q. The following providers are known: %v", TestContext.Provider, strings.Join(providers, " ")) 589 } else { 590 klog.Errorf("Failed to setup provider config for %q: %v", TestContext.Provider, err) 591 } 592 Exit(1) 593 } 594 595 if TestContext.ReportDir != "" { 596 // Create the directory before running the suite. If 597 // --report-dir is not unusable, we should report 598 // that as soon as possible. This will be done by each worker 599 // in parallel, so we will get "exists" error in most of them. 600 if err := os.MkdirAll(TestContext.ReportDir, 0777); err != nil && !os.IsExist(err) { 601 klog.Errorf("Create report dir: %v", err) 602 Exit(1) 603 } 604 ginkgoDir := path.Join(TestContext.ReportDir, "ginkgo") 605 if TestContext.ReportCompleteGinkgo || TestContext.ReportCompleteJUnit { 606 if err := os.MkdirAll(ginkgoDir, 0777); err != nil && !os.IsExist(err) { 607 klog.Errorf("Create <report-dir>/ginkgo: %v", err) 608 Exit(1) 609 } 610 } 611 612 if TestContext.ReportCompleteGinkgo { 613 ginkgo.ReportAfterSuite("Ginkgo JSON report", func(report ginkgo.Report) { 614 ExpectNoError(reporters.GenerateJSONReport(report, path.Join(ginkgoDir, "report.json"))) 615 }) 616 ginkgo.ReportAfterSuite("JUnit XML report", func(report ginkgo.Report) { 617 ExpectNoError(reporters.GenerateJUnitReport(report, path.Join(ginkgoDir, "report.xml"))) 618 }) 619 } 620 621 ginkgo.ReportAfterSuite("Kubernetes e2e JUnit report", func(report ginkgo.Report) { 622 // With Ginkgo v1, we used to write one file per 623 // parallel node. Now Ginkgo v2 automatically merges 624 // all results into a report for us. The 01 suffix is 625 // kept in case that users expect files to be called 626 // "junit_<prefix><number>.xml". 627 junitReport := path.Join(TestContext.ReportDir, "junit_"+TestContext.ReportPrefix+"01.xml") 628 629 // writeJUnitReport generates a JUnit file in the e2e 630 // report directory that is shorter than the one 631 // normally written by `ginkgo --junit-report`. This is 632 // needed because the full report can become too large 633 // for tools like Spyglass 634 // (https://github.com/kubernetes/kubernetes/issues/111510). 635 ExpectNoError(junit.WriteJUnitReport(report, junitReport)) 636 }) 637 } 638 } 639 640 func listTestInformation(report ginkgo.Report) { 641 indent := strings.Repeat(" ", 4) 642 643 if TestContext.listLabels { 644 labels := sets.New[string]() 645 for _, spec := range report.SpecReports { 646 if spec.LeafNodeType == types.NodeTypeIt { 647 labels.Insert(spec.Labels()...) 648 } 649 } 650 fmt.Fprintf(Output, "The following labels can be used with 'gingko run --label-filter':\n%s%s\n\n", indent, strings.Join(sets.List(labels), "\n"+indent)) 651 } 652 if TestContext.listTests { 653 leafs := make([][]string, 0, len(report.SpecReports)) 654 wd, _ := os.Getwd() 655 for _, spec := range report.SpecReports { 656 if spec.LeafNodeType == types.NodeTypeIt { 657 leafs = append(leafs, []string{fmt.Sprintf("%s:%d: ", relativePath(wd, spec.LeafNodeLocation.FileName), spec.LeafNodeLocation.LineNumber), spec.FullText()}) 658 } 659 } 660 // Sort by test name, not the source code location, because the test 661 // name is more stable across code refactoring. 662 sort.Slice(leafs, func(i, j int) bool { 663 return leafs[i][1] < leafs[j][1] 664 }) 665 fmt.Fprint(Output, "The following spec names can be used with 'ginkgo run --focus/skip':\n") 666 for _, leaf := range leafs { 667 fmt.Fprintf(Output, "%s%s%s\n", indent, leaf[0], leaf[1]) 668 } 669 fmt.Fprint(Output, "\n") 670 } 671 } 672 673 func relativePath(wd, path string) string { 674 if wd == "" { 675 return path 676 } 677 relpath, err := filepath.Rel(wd, path) 678 if err != nil { 679 return path 680 } 681 return relpath 682 }