k8s.io/kubernetes@v1.29.3/test/e2e/framework/util.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package framework 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/json" 23 "fmt" 24 "io" 25 "math/rand" 26 "net/url" 27 "os" 28 "os/exec" 29 "path" 30 "strconv" 31 "strings" 32 "sync" 33 "time" 34 35 "github.com/onsi/ginkgo/v2" 36 "github.com/onsi/gomega" 37 38 v1 "k8s.io/api/core/v1" 39 discoveryv1 "k8s.io/api/discovery/v1" 40 apierrors "k8s.io/apimachinery/pkg/api/errors" 41 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 42 "k8s.io/apimachinery/pkg/fields" 43 "k8s.io/apimachinery/pkg/runtime" 44 "k8s.io/apimachinery/pkg/runtime/schema" 45 "k8s.io/apimachinery/pkg/util/sets" 46 "k8s.io/apimachinery/pkg/util/uuid" 47 "k8s.io/apimachinery/pkg/util/wait" 48 "k8s.io/apimachinery/pkg/watch" 49 "k8s.io/client-go/dynamic" 50 clientset "k8s.io/client-go/kubernetes" 51 restclient "k8s.io/client-go/rest" 52 "k8s.io/client-go/tools/cache" 53 "k8s.io/client-go/tools/clientcmd" 54 clientcmdapi "k8s.io/client-go/tools/clientcmd/api" 55 watchtools "k8s.io/client-go/tools/watch" 56 imageutils "k8s.io/kubernetes/test/utils/image" 57 netutils "k8s.io/utils/net" 58 ) 59 60 const ( 61 // TODO(justinsb): Avoid hardcoding this. 62 awsMasterIP = "172.20.0.9" 63 ) 64 65 // DEPRECATED constants. Use the timeouts in framework.Framework instead. 66 const ( 67 // PodListTimeout is how long to wait for the pod to be listable. 68 PodListTimeout = time.Minute 69 70 // PodStartTimeout is how long to wait for the pod to be started. 71 PodStartTimeout = 5 * time.Minute 72 73 // PodStartShortTimeout is same as `PodStartTimeout` to wait for the pod to be started, but shorter. 74 // Use it case by case when we are sure pod start will not be delayed. 75 // minutes by slow docker pulls or something else. 76 PodStartShortTimeout = 2 * time.Minute 77 78 // PodDeleteTimeout is how long to wait for a pod to be deleted. 79 PodDeleteTimeout = 5 * time.Minute 80 81 // PodGetTimeout is how long to wait for a pod to be got. 82 PodGetTimeout = 2 * time.Minute 83 84 // PodEventTimeout is how much we wait for a pod event to occur. 85 PodEventTimeout = 2 * time.Minute 86 87 // ServiceStartTimeout is how long to wait for a service endpoint to be resolvable. 88 ServiceStartTimeout = 3 * time.Minute 89 90 // Poll is how often to Poll pods, nodes and claims. 91 Poll = 2 * time.Second 92 93 // PollShortTimeout is the short timeout value in polling. 94 PollShortTimeout = 1 * time.Minute 95 96 // ServiceAccountProvisionTimeout is how long to wait for a service account to be provisioned. 97 // service accounts are provisioned after namespace creation 98 // a service account is required to support pod creation in a namespace as part of admission control 99 ServiceAccountProvisionTimeout = 2 * time.Minute 100 101 // SingleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent 102 // transient failures from failing tests. 103 SingleCallTimeout = 5 * time.Minute 104 105 // NodeReadyInitialTimeout is how long nodes have to be "ready" when a test begins. They should already 106 // be "ready" before the test starts, so this is small. 107 NodeReadyInitialTimeout = 20 * time.Second 108 109 // PodReadyBeforeTimeout is how long pods have to be "ready" when a test begins. 110 PodReadyBeforeTimeout = 5 * time.Minute 111 112 // ClaimProvisionShortTimeout is same as `ClaimProvisionTimeout` to wait for claim to be dynamically provisioned, but shorter. 113 // Use it case by case when we are sure this timeout is enough. 114 ClaimProvisionShortTimeout = 1 * time.Minute 115 116 // ClaimProvisionTimeout is how long claims have to become dynamically provisioned. 117 ClaimProvisionTimeout = 5 * time.Minute 118 119 // RestartNodeReadyAgainTimeout is how long a node is allowed to become "Ready" after it is restarted before 120 // the test is considered failed. 121 RestartNodeReadyAgainTimeout = 5 * time.Minute 122 123 // RestartPodReadyAgainTimeout is how long a pod is allowed to become "running" and "ready" after a node 124 // restart before test is considered failed. 125 RestartPodReadyAgainTimeout = 5 * time.Minute 126 127 // SnapshotCreateTimeout is how long for snapshot to create snapshotContent. 128 SnapshotCreateTimeout = 5 * time.Minute 129 130 // SnapshotDeleteTimeout is how long for snapshot to delete snapshotContent. 131 SnapshotDeleteTimeout = 5 * time.Minute 132 ) 133 134 var ( 135 // BusyBoxImage is the image URI of BusyBox. 136 BusyBoxImage = imageutils.GetE2EImage(imageutils.BusyBox) 137 138 // ProvidersWithSSH are those providers where each node is accessible with SSH 139 ProvidersWithSSH = []string{"gce", "gke", "aws", "local", "azure"} 140 141 // ServeHostnameImage is a serve hostname image name. 142 ServeHostnameImage = imageutils.GetE2EImage(imageutils.Agnhost) 143 ) 144 145 // RunID is a unique identifier of the e2e run. 146 // Beware that this ID is not the same for all tests in the e2e run, because each Ginkgo node creates it separately. 147 var RunID = uuid.NewUUID() 148 149 // CreateTestingNSFn is a func that is responsible for creating namespace used for executing e2e tests. 150 type CreateTestingNSFn func(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) 151 152 // APIAddress returns a address of an instance. 153 func APIAddress() string { 154 instanceURL, err := url.Parse(TestContext.Host) 155 ExpectNoError(err) 156 return instanceURL.Hostname() 157 } 158 159 // ProviderIs returns true if the provider is included is the providers. Otherwise false. 160 func ProviderIs(providers ...string) bool { 161 for _, provider := range providers { 162 if strings.EqualFold(provider, TestContext.Provider) { 163 return true 164 } 165 } 166 return false 167 } 168 169 // MasterOSDistroIs returns true if the master OS distro is included in the supportedMasterOsDistros. Otherwise false. 170 func MasterOSDistroIs(supportedMasterOsDistros ...string) bool { 171 for _, distro := range supportedMasterOsDistros { 172 if strings.EqualFold(distro, TestContext.MasterOSDistro) { 173 return true 174 } 175 } 176 return false 177 } 178 179 // NodeOSDistroIs returns true if the node OS distro is included in the supportedNodeOsDistros. Otherwise false. 180 func NodeOSDistroIs(supportedNodeOsDistros ...string) bool { 181 for _, distro := range supportedNodeOsDistros { 182 if strings.EqualFold(distro, TestContext.NodeOSDistro) { 183 return true 184 } 185 } 186 return false 187 } 188 189 // NodeOSArchIs returns true if the node OS arch is included in the supportedNodeOsArchs. Otherwise false. 190 func NodeOSArchIs(supportedNodeOsArchs ...string) bool { 191 for _, arch := range supportedNodeOsArchs { 192 if strings.EqualFold(arch, TestContext.NodeOSArch) { 193 return true 194 } 195 } 196 return false 197 } 198 199 // DeleteNamespaces deletes all namespaces that match the given delete and skip filters. 200 // Filter is by simple strings.Contains; first skip filter, then delete filter. 201 // Returns the list of deleted namespaces or an error. 202 func DeleteNamespaces(ctx context.Context, c clientset.Interface, deleteFilter, skipFilter []string) ([]string, error) { 203 ginkgo.By("Deleting namespaces") 204 nsList, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) 205 ExpectNoError(err, "Failed to get namespace list") 206 var deleted []string 207 var wg sync.WaitGroup 208 OUTER: 209 for _, item := range nsList.Items { 210 for _, pattern := range skipFilter { 211 if strings.Contains(item.Name, pattern) { 212 continue OUTER 213 } 214 } 215 if deleteFilter != nil { 216 var shouldDelete bool 217 for _, pattern := range deleteFilter { 218 if strings.Contains(item.Name, pattern) { 219 shouldDelete = true 220 break 221 } 222 } 223 if !shouldDelete { 224 continue OUTER 225 } 226 } 227 wg.Add(1) 228 deleted = append(deleted, item.Name) 229 go func(nsName string) { 230 defer wg.Done() 231 defer ginkgo.GinkgoRecover() 232 gomega.Expect(c.CoreV1().Namespaces().Delete(ctx, nsName, metav1.DeleteOptions{})).To(gomega.Succeed()) 233 Logf("namespace : %v api call to delete is complete ", nsName) 234 }(item.Name) 235 } 236 wg.Wait() 237 return deleted, nil 238 } 239 240 // WaitForNamespacesDeleted waits for the namespaces to be deleted. 241 func WaitForNamespacesDeleted(ctx context.Context, c clientset.Interface, namespaces []string, timeout time.Duration) error { 242 ginkgo.By(fmt.Sprintf("Waiting for namespaces %+v to vanish", namespaces)) 243 nsMap := map[string]bool{} 244 for _, ns := range namespaces { 245 nsMap[ns] = true 246 } 247 //Now POLL until all namespaces have been eradicated. 248 return wait.PollWithContext(ctx, 2*time.Second, timeout, 249 func(ctx context.Context) (bool, error) { 250 nsList, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) 251 if err != nil { 252 return false, err 253 } 254 for _, item := range nsList.Items { 255 if _, ok := nsMap[item.Name]; ok { 256 return false, nil 257 } 258 } 259 return true, nil 260 }) 261 } 262 263 func waitForConfigMapInNamespace(ctx context.Context, c clientset.Interface, ns, name string, timeout time.Duration) error { 264 fieldSelector := fields.OneTermEqualSelector("metadata.name", name).String() 265 ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, timeout) 266 defer cancel() 267 lw := &cache.ListWatch{ 268 ListFunc: func(options metav1.ListOptions) (object runtime.Object, e error) { 269 options.FieldSelector = fieldSelector 270 return c.CoreV1().ConfigMaps(ns).List(ctx, options) 271 }, 272 WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) { 273 options.FieldSelector = fieldSelector 274 return c.CoreV1().ConfigMaps(ns).Watch(ctx, options) 275 }, 276 } 277 _, err := watchtools.UntilWithSync(ctx, lw, &v1.ConfigMap{}, nil, func(event watch.Event) (bool, error) { 278 switch event.Type { 279 case watch.Deleted: 280 return false, apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, name) 281 case watch.Added, watch.Modified: 282 return true, nil 283 } 284 return false, nil 285 }) 286 return err 287 } 288 289 func waitForServiceAccountInNamespace(ctx context.Context, c clientset.Interface, ns, serviceAccountName string, timeout time.Duration) error { 290 fieldSelector := fields.OneTermEqualSelector("metadata.name", serviceAccountName).String() 291 ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, timeout) 292 defer cancel() 293 lw := &cache.ListWatch{ 294 ListFunc: func(options metav1.ListOptions) (object runtime.Object, e error) { 295 options.FieldSelector = fieldSelector 296 return c.CoreV1().ServiceAccounts(ns).List(ctx, options) 297 }, 298 WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) { 299 options.FieldSelector = fieldSelector 300 return c.CoreV1().ServiceAccounts(ns).Watch(ctx, options) 301 }, 302 } 303 _, err := watchtools.UntilWithSync(ctx, lw, &v1.ServiceAccount{}, nil, func(event watch.Event) (bool, error) { 304 switch event.Type { 305 case watch.Deleted: 306 return false, apierrors.NewNotFound(schema.GroupResource{Resource: "serviceaccounts"}, serviceAccountName) 307 case watch.Added, watch.Modified: 308 return true, nil 309 } 310 return false, nil 311 }) 312 if err != nil { 313 return fmt.Errorf("wait for service account %q in namespace %q: %w", serviceAccountName, ns, err) 314 } 315 return nil 316 } 317 318 // WaitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned 319 // the default service account is what is associated with pods when they do not specify a service account 320 // as a result, pods are not able to be provisioned in a namespace until the service account is provisioned 321 func WaitForDefaultServiceAccountInNamespace(ctx context.Context, c clientset.Interface, namespace string) error { 322 return waitForServiceAccountInNamespace(ctx, c, namespace, defaultServiceAccountName, ServiceAccountProvisionTimeout) 323 } 324 325 // WaitForKubeRootCAInNamespace waits for the configmap kube-root-ca.crt containing the service account 326 // CA trust bundle to be provisioned in the specified namespace so that pods do not have to retry mounting 327 // the config map (which creates noise that hides other issues in the Kubelet). 328 func WaitForKubeRootCAInNamespace(ctx context.Context, c clientset.Interface, namespace string) error { 329 return waitForConfigMapInNamespace(ctx, c, namespace, "kube-root-ca.crt", ServiceAccountProvisionTimeout) 330 } 331 332 // CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name. 333 // Please see NewFramework instead of using this directly. 334 func CreateTestingNS(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) { 335 if labels == nil { 336 labels = map[string]string{} 337 } 338 labels["e2e-run"] = string(RunID) 339 340 // We don't use ObjectMeta.GenerateName feature, as in case of API call 341 // failure we don't know whether the namespace was created and what is its 342 // name. 343 name := fmt.Sprintf("%v-%v", baseName, RandomSuffix()) 344 345 namespaceObj := &v1.Namespace{ 346 ObjectMeta: metav1.ObjectMeta{ 347 Name: name, 348 Namespace: "", 349 Labels: labels, 350 }, 351 Status: v1.NamespaceStatus{}, 352 } 353 // Be robust about making the namespace creation call. 354 var got *v1.Namespace 355 if err := wait.PollUntilContextTimeout(ctx, Poll, 30*time.Second, true, func(ctx context.Context) (bool, error) { 356 var err error 357 got, err = c.CoreV1().Namespaces().Create(ctx, namespaceObj, metav1.CreateOptions{}) 358 if err != nil { 359 if apierrors.IsAlreadyExists(err) { 360 // regenerate on conflict 361 Logf("Namespace name %q was already taken, generate a new name and retry", namespaceObj.Name) 362 namespaceObj.Name = fmt.Sprintf("%v-%v", baseName, RandomSuffix()) 363 } else { 364 Logf("Unexpected error while creating namespace: %v", err) 365 } 366 return false, nil 367 } 368 return true, nil 369 }); err != nil { 370 return nil, err 371 } 372 373 if TestContext.VerifyServiceAccount { 374 if err := WaitForDefaultServiceAccountInNamespace(ctx, c, got.Name); err != nil { 375 // Even if we fail to create serviceAccount in the namespace, 376 // we have successfully create a namespace. 377 // So, return the created namespace. 378 return got, err 379 } 380 } 381 return got, nil 382 } 383 384 // CheckTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state 385 // and waits until they are finally deleted. It ignores namespace skip. 386 func CheckTestingNSDeletedExcept(ctx context.Context, c clientset.Interface, skip string) error { 387 // TODO: Since we don't have support for bulk resource deletion in the API, 388 // while deleting a namespace we are deleting all objects from that namespace 389 // one by one (one deletion == one API call). This basically exposes us to 390 // throttling - currently controller-manager has a limit of max 20 QPS. 391 // Once #10217 is implemented and used in namespace-controller, deleting all 392 // object from a given namespace should be much faster and we will be able 393 // to lower this timeout. 394 // However, now Density test is producing ~26000 events and Load capacity test 395 // is producing ~35000 events, thus assuming there are no other requests it will 396 // take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60 397 // minutes to avoid any timeouts here. 398 timeout := 60 * time.Minute 399 400 Logf("Waiting for terminating namespaces to be deleted...") 401 for start := time.Now(); time.Since(start) < timeout; time.Sleep(15 * time.Second) { 402 namespaces, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) 403 if err != nil { 404 Logf("Listing namespaces failed: %v", err) 405 continue 406 } 407 terminating := 0 408 for _, ns := range namespaces.Items { 409 if strings.HasPrefix(ns.ObjectMeta.Name, "e2e-tests-") && ns.ObjectMeta.Name != skip { 410 if ns.Status.Phase == v1.NamespaceActive { 411 return fmt.Errorf("Namespace %s is active", ns.ObjectMeta.Name) 412 } 413 terminating++ 414 } 415 } 416 if terminating == 0 { 417 return nil 418 } 419 } 420 return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out") 421 } 422 423 // WaitForServiceEndpointsNum waits until the amount of endpoints that implement service to expectNum. 424 // Some components use EndpointSlices other Endpoints, we must verify that both objects meet the requirements. 425 func WaitForServiceEndpointsNum(ctx context.Context, c clientset.Interface, namespace, serviceName string, expectNum int, interval, timeout time.Duration) error { 426 return wait.PollWithContext(ctx, interval, timeout, func(ctx context.Context) (bool, error) { 427 Logf("Waiting for amount of service:%s endpoints to be %d", serviceName, expectNum) 428 endpoint, err := c.CoreV1().Endpoints(namespace).Get(ctx, serviceName, metav1.GetOptions{}) 429 if err != nil { 430 Logf("Unexpected error trying to get Endpoints for %s : %v", serviceName, err) 431 return false, nil 432 } 433 434 if countEndpointsNum(endpoint) != expectNum { 435 Logf("Unexpected number of Endpoints, got %d, expected %d", countEndpointsNum(endpoint), expectNum) 436 return false, nil 437 } 438 439 // Endpoints are single family but EndpointSlices can have dual stack addresses, 440 // so we verify the number of addresses that matches the same family on both. 441 addressType := discoveryv1.AddressTypeIPv4 442 if isIPv6Endpoint(endpoint) { 443 addressType = discoveryv1.AddressTypeIPv6 444 } 445 446 esList, err := c.DiscoveryV1().EndpointSlices(namespace).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", discoveryv1.LabelServiceName, serviceName)}) 447 if err != nil { 448 Logf("Unexpected error trying to get EndpointSlices for %s : %v", serviceName, err) 449 return false, nil 450 } 451 452 if len(esList.Items) == 0 { 453 Logf("Waiting for at least 1 EndpointSlice to exist") 454 return false, nil 455 } 456 457 if countEndpointsSlicesNum(esList, addressType) != expectNum { 458 Logf("Unexpected number of Endpoints on Slices, got %d, expected %d", countEndpointsSlicesNum(esList, addressType), expectNum) 459 return false, nil 460 } 461 return true, nil 462 }) 463 } 464 465 func countEndpointsNum(e *v1.Endpoints) int { 466 num := 0 467 for _, sub := range e.Subsets { 468 num += len(sub.Addresses) 469 } 470 return num 471 } 472 473 // isIPv6Endpoint returns true if the Endpoint uses IPv6 addresses 474 func isIPv6Endpoint(e *v1.Endpoints) bool { 475 for _, sub := range e.Subsets { 476 for _, addr := range sub.Addresses { 477 if len(addr.IP) == 0 { 478 continue 479 } 480 // Endpoints are single family, so it is enough to check only one address 481 return netutils.IsIPv6String(addr.IP) 482 } 483 } 484 // default to IPv4 an Endpoint without IP addresses 485 return false 486 } 487 488 func countEndpointsSlicesNum(epList *discoveryv1.EndpointSliceList, addressType discoveryv1.AddressType) int { 489 // EndpointSlices can contain the same address on multiple Slices 490 addresses := sets.Set[string]{} 491 for _, epSlice := range epList.Items { 492 if epSlice.AddressType != addressType { 493 continue 494 } 495 for _, ep := range epSlice.Endpoints { 496 if len(ep.Addresses) > 0 { 497 addresses.Insert(ep.Addresses[0]) 498 } 499 } 500 } 501 return addresses.Len() 502 } 503 504 // restclientConfig returns a config holds the information needed to build connection to kubernetes clusters. 505 func restclientConfig(kubeContext string) (*clientcmdapi.Config, error) { 506 Logf(">>> kubeConfig: %s", TestContext.KubeConfig) 507 if TestContext.KubeConfig == "" { 508 return nil, fmt.Errorf("KubeConfig must be specified to load client config") 509 } 510 c, err := clientcmd.LoadFromFile(TestContext.KubeConfig) 511 if err != nil { 512 return nil, fmt.Errorf("error loading KubeConfig: %v", err.Error()) 513 } 514 if kubeContext != "" { 515 Logf(">>> kubeContext: %s", kubeContext) 516 c.CurrentContext = kubeContext 517 } 518 return c, nil 519 } 520 521 // ClientConfigGetter is a func that returns getter to return a config. 522 type ClientConfigGetter func() (*restclient.Config, error) 523 524 // LoadConfig returns a config for a rest client with the UserAgent set to include the current test name. 525 func LoadConfig() (config *restclient.Config, err error) { 526 defer func() { 527 if err == nil && config != nil { 528 testDesc := ginkgo.CurrentSpecReport() 529 if len(testDesc.ContainerHierarchyTexts) > 0 { 530 testName := strings.Join(testDesc.ContainerHierarchyTexts, " ") 531 if len(testDesc.LeafNodeText) > 0 { 532 testName = testName + " " + testDesc.LeafNodeText 533 } 534 config.UserAgent = fmt.Sprintf("%s -- %s", restclient.DefaultKubernetesUserAgent(), testName) 535 } 536 } 537 }() 538 539 if TestContext.NodeE2E { 540 // This is a node e2e test, apply the node e2e configuration 541 return &restclient.Config{ 542 Host: TestContext.Host, 543 BearerToken: TestContext.BearerToken, 544 TLSClientConfig: restclient.TLSClientConfig{ 545 Insecure: true, 546 }, 547 }, nil 548 } 549 c, err := restclientConfig(TestContext.KubeContext) 550 if err != nil { 551 if TestContext.KubeConfig == "" { 552 return restclient.InClusterConfig() 553 } 554 return nil, err 555 } 556 // In case Host is not set in TestContext, sets it as 557 // CurrentContext Server for k8s API client to connect to. 558 if TestContext.Host == "" && c.Clusters != nil { 559 currentContext, ok := c.Clusters[c.CurrentContext] 560 if ok { 561 TestContext.Host = currentContext.Server 562 } 563 } 564 565 return clientcmd.NewDefaultClientConfig(*c, &clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: TestContext.Host}}).ClientConfig() 566 } 567 568 // LoadClientset returns clientset for connecting to kubernetes clusters. 569 func LoadClientset() (*clientset.Clientset, error) { 570 config, err := LoadConfig() 571 if err != nil { 572 return nil, fmt.Errorf("error creating client: %v", err.Error()) 573 } 574 return clientset.NewForConfig(config) 575 } 576 577 // RandomSuffix provides a random sequence to append to pods,services,rcs. 578 func RandomSuffix() string { 579 return strconv.Itoa(rand.Intn(10000)) 580 } 581 582 // StartCmdAndStreamOutput returns stdout and stderr after starting the given cmd. 583 func StartCmdAndStreamOutput(cmd *exec.Cmd) (stdout, stderr io.ReadCloser, err error) { 584 stdout, err = cmd.StdoutPipe() 585 if err != nil { 586 return 587 } 588 stderr, err = cmd.StderrPipe() 589 if err != nil { 590 return 591 } 592 Logf("Asynchronously running '%s %s'", cmd.Path, strings.Join(cmd.Args, " ")) 593 err = cmd.Start() 594 return 595 } 596 597 // TryKill is rough equivalent of ctrl+c for cleaning up processes. Intended to be run in defer. 598 func TryKill(cmd *exec.Cmd) { 599 if err := cmd.Process.Kill(); err != nil { 600 Logf("ERROR failed to kill command %v! The process may leak", cmd) 601 } 602 } 603 604 // EnsureLoadBalancerResourcesDeleted ensures that cloud load balancer resources that were created 605 // are actually cleaned up. Currently only implemented for GCE/GKE. 606 func EnsureLoadBalancerResourcesDeleted(ctx context.Context, ip, portRange string) error { 607 return TestContext.CloudConfig.Provider.EnsureLoadBalancerResourcesDeleted(ctx, ip, portRange) 608 } 609 610 // CoreDump SSHs to the master and all nodes and dumps their logs into dir. 611 // It shells out to cluster/log-dump/log-dump.sh to accomplish this. 612 func CoreDump(dir string) { 613 if TestContext.DisableLogDump { 614 Logf("Skipping dumping logs from cluster") 615 return 616 } 617 var cmd *exec.Cmd 618 if TestContext.LogexporterGCSPath != "" { 619 Logf("Dumping logs from nodes to GCS directly at path: %s", TestContext.LogexporterGCSPath) 620 cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir, TestContext.LogexporterGCSPath) 621 } else { 622 Logf("Dumping logs locally to: %s", dir) 623 cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir) 624 } 625 cmd.Env = append(os.Environ(), fmt.Sprintf("LOG_DUMP_SYSTEMD_SERVICES=%s", parseSystemdServices(TestContext.SystemdServices))) 626 cmd.Env = append(os.Environ(), fmt.Sprintf("LOG_DUMP_SYSTEMD_JOURNAL=%v", TestContext.DumpSystemdJournal)) 627 628 cmd.Stdout = os.Stdout 629 cmd.Stderr = os.Stderr 630 if err := cmd.Run(); err != nil { 631 Logf("Error running cluster/log-dump/log-dump.sh: %v", err) 632 } 633 } 634 635 // parseSystemdServices converts services separator from comma to space. 636 func parseSystemdServices(services string) string { 637 return strings.TrimSpace(strings.Replace(services, ",", " ", -1)) 638 } 639 640 // RunCmd runs cmd using args and returns its stdout and stderr. It also outputs 641 // cmd's stdout and stderr to their respective OS streams. 642 func RunCmd(command string, args ...string) (string, string, error) { 643 return RunCmdEnv(nil, command, args...) 644 } 645 646 // RunCmdEnv runs cmd with the provided environment and args and 647 // returns its stdout and stderr. It also outputs cmd's stdout and 648 // stderr to their respective OS streams. 649 func RunCmdEnv(env []string, command string, args ...string) (string, string, error) { 650 Logf("Running %s %v", command, args) 651 var bout, berr bytes.Buffer 652 cmd := exec.Command(command, args...) 653 // We also output to the OS stdout/stderr to aid in debugging in case cmd 654 // hangs and never returns before the test gets killed. 655 // 656 // This creates some ugly output because gcloud doesn't always provide 657 // newlines. 658 cmd.Stdout = io.MultiWriter(os.Stdout, &bout) 659 cmd.Stderr = io.MultiWriter(os.Stderr, &berr) 660 cmd.Env = env 661 err := cmd.Run() 662 stdout, stderr := bout.String(), berr.String() 663 if err != nil { 664 return "", "", fmt.Errorf("error running %s %v; got error %v, stdout %q, stderr %q", 665 command, args, err, stdout, stderr) 666 } 667 return stdout, stderr, nil 668 } 669 670 // getControlPlaneAddresses returns the externalIP, internalIP and hostname fields of control plane nodes. 671 // If any of these is unavailable, empty slices are returned. 672 func getControlPlaneAddresses(ctx context.Context, c clientset.Interface) ([]string, []string, []string) { 673 var externalIPs, internalIPs, hostnames []string 674 675 // Populate the internal IPs. 676 eps, err := c.CoreV1().Endpoints(metav1.NamespaceDefault).Get(ctx, "kubernetes", metav1.GetOptions{}) 677 if err != nil { 678 Failf("Failed to get kubernetes endpoints: %v", err) 679 } 680 for _, subset := range eps.Subsets { 681 for _, address := range subset.Addresses { 682 if address.IP != "" { 683 internalIPs = append(internalIPs, address.IP) 684 } 685 } 686 } 687 688 // Populate the external IP/hostname. 689 hostURL, err := url.Parse(TestContext.Host) 690 if err != nil { 691 Failf("Failed to parse hostname: %v", err) 692 } 693 if netutils.ParseIPSloppy(hostURL.Host) != nil { 694 externalIPs = append(externalIPs, hostURL.Host) 695 } else { 696 hostnames = append(hostnames, hostURL.Host) 697 } 698 699 return externalIPs, internalIPs, hostnames 700 } 701 702 // GetControlPlaneAddresses returns all IP addresses on which the kubelet can reach the control plane. 703 // It may return internal and external IPs, even if we expect for 704 // e.g. internal IPs to be used (issue #56787), so that we can be 705 // sure to block the control plane fully during tests. 706 func GetControlPlaneAddresses(ctx context.Context, c clientset.Interface) []string { 707 externalIPs, internalIPs, _ := getControlPlaneAddresses(ctx, c) 708 709 ips := sets.NewString() 710 switch TestContext.Provider { 711 case "gce", "gke": 712 for _, ip := range externalIPs { 713 ips.Insert(ip) 714 } 715 for _, ip := range internalIPs { 716 ips.Insert(ip) 717 } 718 case "aws": 719 ips.Insert(awsMasterIP) 720 default: 721 Failf("This test is not supported for provider %s and should be disabled", TestContext.Provider) 722 } 723 return ips.List() 724 } 725 726 // PrettyPrintJSON converts metrics to JSON format. 727 func PrettyPrintJSON(metrics interface{}) string { 728 output := &bytes.Buffer{} 729 if err := json.NewEncoder(output).Encode(metrics); err != nil { 730 Logf("Error building encoder: %v", err) 731 return "" 732 } 733 formatted := &bytes.Buffer{} 734 if err := json.Indent(formatted, output.Bytes(), "", " "); err != nil { 735 Logf("Error indenting: %v", err) 736 return "" 737 } 738 return formatted.String() 739 } 740 741 // WatchEventSequenceVerifier ... 742 // manages a watch for a given resource, ensures that events take place in a given order, retries the test on failure 743 // 744 // ctx cancellation signal across API boundaries, e.g: context from Ginkgo 745 // dc sets up a client to the API 746 // resourceType specify the type of resource 747 // namespace select a namespace 748 // resourceName the name of the given resource 749 // listOptions options used to find the resource, recommended to use listOptions.labelSelector 750 // expectedWatchEvents array of events which are expected to occur 751 // scenario the test itself 752 // retryCleanup a function to run which ensures that there are no dangling resources upon test failure 753 // 754 // this tooling relies on the test to return the events as they occur 755 // the entire scenario must be run to ensure that the desired watch events arrive in order (allowing for interweaving of watch events) 756 // 757 // if an expected watch event is missing we elect to clean up and run the entire scenario again 758 // 759 // we try the scenario three times to allow the sequencing to fail a couple of times 760 func WatchEventSequenceVerifier(ctx context.Context, dc dynamic.Interface, resourceType schema.GroupVersionResource, namespace string, resourceName string, listOptions metav1.ListOptions, expectedWatchEvents []watch.Event, scenario func(*watchtools.RetryWatcher) []watch.Event, retryCleanup func() error) { 761 listWatcher := &cache.ListWatch{ 762 WatchFunc: func(listOptions metav1.ListOptions) (watch.Interface, error) { 763 return dc.Resource(resourceType).Namespace(namespace).Watch(ctx, listOptions) 764 }, 765 } 766 767 retries := 3 768 retriesLoop: 769 for try := 1; try <= retries; try++ { 770 initResource, err := dc.Resource(resourceType).Namespace(namespace).List(ctx, listOptions) 771 ExpectNoError(err, "Failed to fetch initial resource") 772 773 resourceWatch, err := watchtools.NewRetryWatcher(initResource.GetResourceVersion(), listWatcher) 774 ExpectNoError(err, "Failed to create a resource watch of %v in namespace %v", resourceType.Resource, namespace) 775 776 // NOTE the test may need access to the events to see what's going on, such as a change in status 777 actualWatchEvents := scenario(resourceWatch) 778 errs := sets.NewString() 779 gomega.Expect(len(expectedWatchEvents)).To(gomega.BeNumerically("<=", len(actualWatchEvents)), "Did not get enough watch events") 780 781 totalValidWatchEvents := 0 782 foundEventIndexes := map[int]*int{} 783 784 for watchEventIndex, expectedWatchEvent := range expectedWatchEvents { 785 foundExpectedWatchEvent := false 786 actualWatchEventsLoop: 787 for actualWatchEventIndex, actualWatchEvent := range actualWatchEvents { 788 if foundEventIndexes[actualWatchEventIndex] != nil { 789 continue actualWatchEventsLoop 790 } 791 if actualWatchEvent.Type == expectedWatchEvent.Type { 792 foundExpectedWatchEvent = true 793 foundEventIndexes[actualWatchEventIndex] = &watchEventIndex 794 break actualWatchEventsLoop 795 } 796 } 797 if !foundExpectedWatchEvent { 798 errs.Insert(fmt.Sprintf("Watch event %v not found", expectedWatchEvent.Type)) 799 } 800 totalValidWatchEvents++ 801 } 802 err = retryCleanup() 803 ExpectNoError(err, "Error occurred when cleaning up resources") 804 if errs.Len() > 0 && try < retries { 805 fmt.Println("invariants violated:\n", strings.Join(errs.List(), "\n - ")) 806 continue retriesLoop 807 } 808 if errs.Len() > 0 { 809 Failf("Unexpected error(s): %v", strings.Join(errs.List(), "\n - ")) 810 } 811 gomega.Expect(expectedWatchEvents).To(gomega.HaveLen(totalValidWatchEvents), "Error: there must be an equal amount of total valid watch events (%d) and expected watch events (%d)", totalValidWatchEvents, len(expectedWatchEvents)) 812 break retriesLoop 813 } 814 }