k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/framework/util.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package framework 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/json" 23 "fmt" 24 "io" 25 "math/rand" 26 "net/url" 27 "os" 28 "os/exec" 29 "path" 30 "strconv" 31 "strings" 32 "sync" 33 "time" 34 35 "github.com/onsi/ginkgo/v2" 36 "github.com/onsi/gomega" 37 38 v1 "k8s.io/api/core/v1" 39 discoveryv1 "k8s.io/api/discovery/v1" 40 apierrors "k8s.io/apimachinery/pkg/api/errors" 41 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 42 "k8s.io/apimachinery/pkg/fields" 43 "k8s.io/apimachinery/pkg/runtime" 44 "k8s.io/apimachinery/pkg/runtime/schema" 45 "k8s.io/apimachinery/pkg/util/sets" 46 "k8s.io/apimachinery/pkg/util/uuid" 47 "k8s.io/apimachinery/pkg/util/wait" 48 "k8s.io/apimachinery/pkg/watch" 49 "k8s.io/client-go/dynamic" 50 clientset "k8s.io/client-go/kubernetes" 51 restclient "k8s.io/client-go/rest" 52 "k8s.io/client-go/tools/cache" 53 "k8s.io/client-go/tools/clientcmd" 54 clientcmdapi "k8s.io/client-go/tools/clientcmd/api" 55 watchtools "k8s.io/client-go/tools/watch" 56 netutils "k8s.io/utils/net" 57 ) 58 59 const ( 60 // TODO(justinsb): Avoid hardcoding this. 61 awsMasterIP = "172.20.0.9" 62 ) 63 64 // DEPRECATED constants. Use the timeouts in framework.Framework instead. 65 const ( 66 // PodListTimeout is how long to wait for the pod to be listable. 67 PodListTimeout = time.Minute 68 69 // PodStartTimeout is how long to wait for the pod to be started. 70 PodStartTimeout = 5 * time.Minute 71 72 // PodStartShortTimeout is same as `PodStartTimeout` to wait for the pod to be started, but shorter. 73 // Use it case by case when we are sure pod start will not be delayed. 74 // minutes by slow docker pulls or something else. 75 PodStartShortTimeout = 2 * time.Minute 76 77 // PodDeleteTimeout is how long to wait for a pod to be deleted. 78 PodDeleteTimeout = 5 * time.Minute 79 80 // PodGetTimeout is how long to wait for a pod to be got. 81 PodGetTimeout = 2 * time.Minute 82 83 // PodEventTimeout is how much we wait for a pod event to occur. 84 PodEventTimeout = 2 * time.Minute 85 86 // ServiceStartTimeout is how long to wait for a service endpoint to be resolvable. 87 ServiceStartTimeout = 3 * time.Minute 88 89 // Poll is how often to Poll pods, nodes and claims. 90 Poll = 2 * time.Second 91 92 // PollShortTimeout is the short timeout value in polling. 93 PollShortTimeout = 1 * time.Minute 94 95 // ServiceAccountProvisionTimeout is how long to wait for a service account to be provisioned. 96 // service accounts are provisioned after namespace creation 97 // a service account is required to support pod creation in a namespace as part of admission control 98 ServiceAccountProvisionTimeout = 2 * time.Minute 99 100 // SingleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent 101 // transient failures from failing tests. 102 SingleCallTimeout = 5 * time.Minute 103 104 // NodeReadyInitialTimeout is how long nodes have to be "ready" when a test begins. They should already 105 // be "ready" before the test starts, so this is small. 106 NodeReadyInitialTimeout = 20 * time.Second 107 108 // PodReadyBeforeTimeout is how long pods have to be "ready" when a test begins. 109 PodReadyBeforeTimeout = 5 * time.Minute 110 111 // ClaimProvisionShortTimeout is same as `ClaimProvisionTimeout` to wait for claim to be dynamically provisioned, but shorter. 112 // Use it case by case when we are sure this timeout is enough. 113 ClaimProvisionShortTimeout = 1 * time.Minute 114 115 // ClaimProvisionTimeout is how long claims have to become dynamically provisioned. 116 ClaimProvisionTimeout = 5 * time.Minute 117 118 // RestartNodeReadyAgainTimeout is how long a node is allowed to become "Ready" after it is restarted before 119 // the test is considered failed. 120 RestartNodeReadyAgainTimeout = 5 * time.Minute 121 122 // RestartPodReadyAgainTimeout is how long a pod is allowed to become "running" and "ready" after a node 123 // restart before test is considered failed. 124 RestartPodReadyAgainTimeout = 5 * time.Minute 125 126 // SnapshotCreateTimeout is how long for snapshot to create snapshotContent. 127 SnapshotCreateTimeout = 5 * time.Minute 128 129 // SnapshotDeleteTimeout is how long for snapshot to delete snapshotContent. 130 SnapshotDeleteTimeout = 5 * time.Minute 131 ) 132 133 var ( 134 // ProvidersWithSSH are those providers where each node is accessible with SSH 135 ProvidersWithSSH = []string{"gce", "gke", "aws", "local", "azure"} 136 ) 137 138 // RunID is a unique identifier of the e2e run. 139 // Beware that this ID is not the same for all tests in the e2e run, because each Ginkgo node creates it separately. 140 var RunID = uuid.NewUUID() 141 142 // CreateTestingNSFn is a func that is responsible for creating namespace used for executing e2e tests. 143 type CreateTestingNSFn func(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) 144 145 // APIAddress returns a address of an instance. 146 func APIAddress() string { 147 instanceURL, err := url.Parse(TestContext.Host) 148 ExpectNoError(err) 149 return instanceURL.Hostname() 150 } 151 152 // ProviderIs returns true if the provider is included is the providers. Otherwise false. 153 func ProviderIs(providers ...string) bool { 154 for _, provider := range providers { 155 if strings.EqualFold(provider, TestContext.Provider) { 156 return true 157 } 158 } 159 return false 160 } 161 162 // MasterOSDistroIs returns true if the master OS distro is included in the supportedMasterOsDistros. Otherwise false. 163 func MasterOSDistroIs(supportedMasterOsDistros ...string) bool { 164 for _, distro := range supportedMasterOsDistros { 165 if strings.EqualFold(distro, TestContext.MasterOSDistro) { 166 return true 167 } 168 } 169 return false 170 } 171 172 // NodeOSDistroIs returns true if the node OS distro is included in the supportedNodeOsDistros. Otherwise false. 173 func NodeOSDistroIs(supportedNodeOsDistros ...string) bool { 174 for _, distro := range supportedNodeOsDistros { 175 if strings.EqualFold(distro, TestContext.NodeOSDistro) { 176 return true 177 } 178 } 179 return false 180 } 181 182 // NodeOSArchIs returns true if the node OS arch is included in the supportedNodeOsArchs. Otherwise false. 183 func NodeOSArchIs(supportedNodeOsArchs ...string) bool { 184 for _, arch := range supportedNodeOsArchs { 185 if strings.EqualFold(arch, TestContext.NodeOSArch) { 186 return true 187 } 188 } 189 return false 190 } 191 192 // DeleteNamespaces deletes all namespaces that match the given delete and skip filters. 193 // Filter is by simple strings.Contains; first skip filter, then delete filter. 194 // Returns the list of deleted namespaces or an error. 195 func DeleteNamespaces(ctx context.Context, c clientset.Interface, deleteFilter, skipFilter []string) ([]string, error) { 196 ginkgo.By("Deleting namespaces") 197 nsList, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) 198 ExpectNoError(err, "Failed to get namespace list") 199 var deleted []string 200 var wg sync.WaitGroup 201 OUTER: 202 for _, item := range nsList.Items { 203 for _, pattern := range skipFilter { 204 if strings.Contains(item.Name, pattern) { 205 continue OUTER 206 } 207 } 208 if deleteFilter != nil { 209 var shouldDelete bool 210 for _, pattern := range deleteFilter { 211 if strings.Contains(item.Name, pattern) { 212 shouldDelete = true 213 break 214 } 215 } 216 if !shouldDelete { 217 continue OUTER 218 } 219 } 220 wg.Add(1) 221 deleted = append(deleted, item.Name) 222 go func(nsName string) { 223 defer wg.Done() 224 defer ginkgo.GinkgoRecover() 225 gomega.Expect(c.CoreV1().Namespaces().Delete(ctx, nsName, metav1.DeleteOptions{})).To(gomega.Succeed()) 226 Logf("namespace : %v api call to delete is complete ", nsName) 227 }(item.Name) 228 } 229 wg.Wait() 230 return deleted, nil 231 } 232 233 // WaitForNamespacesDeleted waits for the namespaces to be deleted. 234 func WaitForNamespacesDeleted(ctx context.Context, c clientset.Interface, namespaces []string, timeout time.Duration) error { 235 ginkgo.By(fmt.Sprintf("Waiting for namespaces %+v to vanish", namespaces)) 236 nsMap := map[string]bool{} 237 for _, ns := range namespaces { 238 nsMap[ns] = true 239 } 240 //Now POLL until all namespaces have been eradicated. 241 return wait.PollWithContext(ctx, 2*time.Second, timeout, 242 func(ctx context.Context) (bool, error) { 243 nsList, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) 244 if err != nil { 245 return false, err 246 } 247 for _, item := range nsList.Items { 248 if _, ok := nsMap[item.Name]; ok { 249 return false, nil 250 } 251 } 252 return true, nil 253 }) 254 } 255 256 func waitForConfigMapInNamespace(ctx context.Context, c clientset.Interface, ns, name string, timeout time.Duration) error { 257 fieldSelector := fields.OneTermEqualSelector("metadata.name", name).String() 258 ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, timeout) 259 defer cancel() 260 lw := &cache.ListWatch{ 261 ListFunc: func(options metav1.ListOptions) (object runtime.Object, e error) { 262 options.FieldSelector = fieldSelector 263 return c.CoreV1().ConfigMaps(ns).List(ctx, options) 264 }, 265 WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) { 266 options.FieldSelector = fieldSelector 267 return c.CoreV1().ConfigMaps(ns).Watch(ctx, options) 268 }, 269 } 270 _, err := watchtools.UntilWithSync(ctx, lw, &v1.ConfigMap{}, nil, func(event watch.Event) (bool, error) { 271 switch event.Type { 272 case watch.Deleted: 273 return false, apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, name) 274 case watch.Added, watch.Modified: 275 return true, nil 276 } 277 return false, nil 278 }) 279 return err 280 } 281 282 func waitForServiceAccountInNamespace(ctx context.Context, c clientset.Interface, ns, serviceAccountName string, timeout time.Duration) error { 283 fieldSelector := fields.OneTermEqualSelector("metadata.name", serviceAccountName).String() 284 ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, timeout) 285 defer cancel() 286 lw := &cache.ListWatch{ 287 ListFunc: func(options metav1.ListOptions) (object runtime.Object, e error) { 288 options.FieldSelector = fieldSelector 289 return c.CoreV1().ServiceAccounts(ns).List(ctx, options) 290 }, 291 WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) { 292 options.FieldSelector = fieldSelector 293 return c.CoreV1().ServiceAccounts(ns).Watch(ctx, options) 294 }, 295 } 296 _, err := watchtools.UntilWithSync(ctx, lw, &v1.ServiceAccount{}, nil, func(event watch.Event) (bool, error) { 297 switch event.Type { 298 case watch.Deleted: 299 return false, apierrors.NewNotFound(schema.GroupResource{Resource: "serviceaccounts"}, serviceAccountName) 300 case watch.Added, watch.Modified: 301 return true, nil 302 } 303 return false, nil 304 }) 305 if err != nil { 306 return fmt.Errorf("wait for service account %q in namespace %q: %w", serviceAccountName, ns, err) 307 } 308 return nil 309 } 310 311 // WaitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned 312 // the default service account is what is associated with pods when they do not specify a service account 313 // as a result, pods are not able to be provisioned in a namespace until the service account is provisioned 314 func WaitForDefaultServiceAccountInNamespace(ctx context.Context, c clientset.Interface, namespace string) error { 315 return waitForServiceAccountInNamespace(ctx, c, namespace, defaultServiceAccountName, ServiceAccountProvisionTimeout) 316 } 317 318 // WaitForKubeRootCAInNamespace waits for the configmap kube-root-ca.crt containing the service account 319 // CA trust bundle to be provisioned in the specified namespace so that pods do not have to retry mounting 320 // the config map (which creates noise that hides other issues in the Kubelet). 321 func WaitForKubeRootCAInNamespace(ctx context.Context, c clientset.Interface, namespace string) error { 322 return waitForConfigMapInNamespace(ctx, c, namespace, "kube-root-ca.crt", ServiceAccountProvisionTimeout) 323 } 324 325 // CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name. 326 // Please see NewFramework instead of using this directly. 327 func CreateTestingNS(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) { 328 if labels == nil { 329 labels = map[string]string{} 330 } 331 labels["e2e-run"] = string(RunID) 332 333 // We don't use ObjectMeta.GenerateName feature, as in case of API call 334 // failure we don't know whether the namespace was created and what is its 335 // name. 336 name := fmt.Sprintf("%v-%v", baseName, RandomSuffix()) 337 338 namespaceObj := &v1.Namespace{ 339 ObjectMeta: metav1.ObjectMeta{ 340 Name: name, 341 Namespace: "", 342 Labels: labels, 343 }, 344 Status: v1.NamespaceStatus{}, 345 } 346 // Be robust about making the namespace creation call. 347 var got *v1.Namespace 348 if err := wait.PollUntilContextTimeout(ctx, Poll, 30*time.Second, true, func(ctx context.Context) (bool, error) { 349 var err error 350 got, err = c.CoreV1().Namespaces().Create(ctx, namespaceObj, metav1.CreateOptions{}) 351 if err != nil { 352 if apierrors.IsAlreadyExists(err) { 353 // regenerate on conflict 354 Logf("Namespace name %q was already taken, generate a new name and retry", namespaceObj.Name) 355 namespaceObj.Name = fmt.Sprintf("%v-%v", baseName, RandomSuffix()) 356 } else { 357 Logf("Unexpected error while creating namespace: %v", err) 358 } 359 return false, nil 360 } 361 return true, nil 362 }); err != nil { 363 return nil, err 364 } 365 366 if TestContext.VerifyServiceAccount { 367 if err := WaitForDefaultServiceAccountInNamespace(ctx, c, got.Name); err != nil { 368 // Even if we fail to create serviceAccount in the namespace, 369 // we have successfully create a namespace. 370 // So, return the created namespace. 371 return got, err 372 } 373 } 374 return got, nil 375 } 376 377 // CheckTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state 378 // and waits until they are finally deleted. It ignores namespace skip. 379 func CheckTestingNSDeletedExcept(ctx context.Context, c clientset.Interface, skip string) error { 380 // TODO: Since we don't have support for bulk resource deletion in the API, 381 // while deleting a namespace we are deleting all objects from that namespace 382 // one by one (one deletion == one API call). This basically exposes us to 383 // throttling - currently controller-manager has a limit of max 20 QPS. 384 // Once #10217 is implemented and used in namespace-controller, deleting all 385 // object from a given namespace should be much faster and we will be able 386 // to lower this timeout. 387 // However, now Density test is producing ~26000 events and Load capacity test 388 // is producing ~35000 events, thus assuming there are no other requests it will 389 // take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60 390 // minutes to avoid any timeouts here. 391 timeout := 60 * time.Minute 392 393 Logf("Waiting for terminating namespaces to be deleted...") 394 for start := time.Now(); time.Since(start) < timeout; time.Sleep(15 * time.Second) { 395 namespaces, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) 396 if err != nil { 397 Logf("Listing namespaces failed: %v", err) 398 continue 399 } 400 terminating := 0 401 for _, ns := range namespaces.Items { 402 if strings.HasPrefix(ns.ObjectMeta.Name, "e2e-tests-") && ns.ObjectMeta.Name != skip { 403 if ns.Status.Phase == v1.NamespaceActive { 404 return fmt.Errorf("Namespace %s is active", ns.ObjectMeta.Name) 405 } 406 terminating++ 407 } 408 } 409 if terminating == 0 { 410 return nil 411 } 412 } 413 return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out") 414 } 415 416 // WaitForServiceEndpointsNum waits until the amount of endpoints that implement service to expectNum. 417 // Some components use EndpointSlices other Endpoints, we must verify that both objects meet the requirements. 418 func WaitForServiceEndpointsNum(ctx context.Context, c clientset.Interface, namespace, serviceName string, expectNum int, interval, timeout time.Duration) error { 419 return wait.PollWithContext(ctx, interval, timeout, func(ctx context.Context) (bool, error) { 420 Logf("Waiting for amount of service:%s endpoints to be %d", serviceName, expectNum) 421 endpoint, err := c.CoreV1().Endpoints(namespace).Get(ctx, serviceName, metav1.GetOptions{}) 422 if err != nil { 423 Logf("Unexpected error trying to get Endpoints for %s : %v", serviceName, err) 424 return false, nil 425 } 426 427 if countEndpointsNum(endpoint) != expectNum { 428 Logf("Unexpected number of Endpoints, got %d, expected %d", countEndpointsNum(endpoint), expectNum) 429 return false, nil 430 } 431 432 // Endpoints are single family but EndpointSlices can have dual stack addresses, 433 // so we verify the number of addresses that matches the same family on both. 434 addressType := discoveryv1.AddressTypeIPv4 435 if isIPv6Endpoint(endpoint) { 436 addressType = discoveryv1.AddressTypeIPv6 437 } 438 439 esList, err := c.DiscoveryV1().EndpointSlices(namespace).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", discoveryv1.LabelServiceName, serviceName)}) 440 if err != nil { 441 Logf("Unexpected error trying to get EndpointSlices for %s : %v", serviceName, err) 442 return false, nil 443 } 444 445 if len(esList.Items) == 0 { 446 Logf("Waiting for at least 1 EndpointSlice to exist") 447 return false, nil 448 } 449 450 if countEndpointsSlicesNum(esList, addressType) != expectNum { 451 Logf("Unexpected number of Endpoints on Slices, got %d, expected %d", countEndpointsSlicesNum(esList, addressType), expectNum) 452 return false, nil 453 } 454 return true, nil 455 }) 456 } 457 458 func countEndpointsNum(e *v1.Endpoints) int { 459 num := 0 460 for _, sub := range e.Subsets { 461 num += len(sub.Addresses) 462 } 463 return num 464 } 465 466 // isIPv6Endpoint returns true if the Endpoint uses IPv6 addresses 467 func isIPv6Endpoint(e *v1.Endpoints) bool { 468 for _, sub := range e.Subsets { 469 for _, addr := range sub.Addresses { 470 if len(addr.IP) == 0 { 471 continue 472 } 473 // Endpoints are single family, so it is enough to check only one address 474 return netutils.IsIPv6String(addr.IP) 475 } 476 } 477 // default to IPv4 an Endpoint without IP addresses 478 return false 479 } 480 481 func countEndpointsSlicesNum(epList *discoveryv1.EndpointSliceList, addressType discoveryv1.AddressType) int { 482 // EndpointSlices can contain the same address on multiple Slices 483 addresses := sets.Set[string]{} 484 for _, epSlice := range epList.Items { 485 if epSlice.AddressType != addressType { 486 continue 487 } 488 for _, ep := range epSlice.Endpoints { 489 if len(ep.Addresses) > 0 { 490 addresses.Insert(ep.Addresses[0]) 491 } 492 } 493 } 494 return addresses.Len() 495 } 496 497 // restclientConfig returns a config holds the information needed to build connection to kubernetes clusters. 498 func restclientConfig(kubeContext string) (*clientcmdapi.Config, error) { 499 Logf(">>> kubeConfig: %s", TestContext.KubeConfig) 500 if TestContext.KubeConfig == "" { 501 return nil, fmt.Errorf("KubeConfig must be specified to load client config") 502 } 503 c, err := clientcmd.LoadFromFile(TestContext.KubeConfig) 504 if err != nil { 505 return nil, fmt.Errorf("error loading KubeConfig: %v", err.Error()) 506 } 507 if kubeContext != "" { 508 Logf(">>> kubeContext: %s", kubeContext) 509 c.CurrentContext = kubeContext 510 } 511 return c, nil 512 } 513 514 // ClientConfigGetter is a func that returns getter to return a config. 515 type ClientConfigGetter func() (*restclient.Config, error) 516 517 // LoadConfig returns a config for a rest client with the UserAgent set to include the current test name. 518 func LoadConfig() (config *restclient.Config, err error) { 519 defer func() { 520 if err == nil && config != nil { 521 testDesc := ginkgo.CurrentSpecReport() 522 if len(testDesc.ContainerHierarchyTexts) > 0 { 523 testName := strings.Join(testDesc.ContainerHierarchyTexts, " ") 524 if len(testDesc.LeafNodeText) > 0 { 525 testName = testName + " " + testDesc.LeafNodeText 526 } 527 config.UserAgent = fmt.Sprintf("%s -- %s", restclient.DefaultKubernetesUserAgent(), testName) 528 } 529 } 530 }() 531 532 if TestContext.NodeE2E { 533 // This is a node e2e test, apply the node e2e configuration 534 return &restclient.Config{ 535 Host: TestContext.Host, 536 BearerToken: TestContext.BearerToken, 537 TLSClientConfig: restclient.TLSClientConfig{ 538 Insecure: true, 539 }, 540 }, nil 541 } 542 c, err := restclientConfig(TestContext.KubeContext) 543 if err != nil { 544 if TestContext.KubeConfig == "" { 545 return restclient.InClusterConfig() 546 } 547 return nil, err 548 } 549 // In case Host is not set in TestContext, sets it as 550 // CurrentContext Server for k8s API client to connect to. 551 if TestContext.Host == "" && c.Clusters != nil { 552 currentContext, ok := c.Clusters[c.CurrentContext] 553 if ok { 554 TestContext.Host = currentContext.Server 555 } 556 } 557 558 return clientcmd.NewDefaultClientConfig(*c, &clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: TestContext.Host}}).ClientConfig() 559 } 560 561 // LoadClientset returns clientset for connecting to kubernetes clusters. 562 func LoadClientset() (*clientset.Clientset, error) { 563 config, err := LoadConfig() 564 if err != nil { 565 return nil, fmt.Errorf("error creating client: %v", err.Error()) 566 } 567 return clientset.NewForConfig(config) 568 } 569 570 // RandomSuffix provides a random sequence to append to pods,services,rcs. 571 func RandomSuffix() string { 572 return strconv.Itoa(rand.Intn(10000)) 573 } 574 575 // StartCmdAndStreamOutput returns stdout and stderr after starting the given cmd. 576 func StartCmdAndStreamOutput(cmd *exec.Cmd) (stdout, stderr io.ReadCloser, err error) { 577 stdout, err = cmd.StdoutPipe() 578 if err != nil { 579 return 580 } 581 stderr, err = cmd.StderrPipe() 582 if err != nil { 583 return 584 } 585 Logf("Asynchronously running '%s %s'", cmd.Path, strings.Join(cmd.Args, " ")) 586 err = cmd.Start() 587 return 588 } 589 590 // TryKill is rough equivalent of ctrl+c for cleaning up processes. Intended to be run in defer. 591 func TryKill(cmd *exec.Cmd) { 592 if err := cmd.Process.Kill(); err != nil { 593 Logf("ERROR failed to kill command %v! The process may leak", cmd) 594 } 595 } 596 597 // EnsureLoadBalancerResourcesDeleted ensures that cloud load balancer resources that were created 598 // are actually cleaned up. Currently only implemented for GCE/GKE. 599 func EnsureLoadBalancerResourcesDeleted(ctx context.Context, ip, portRange string) error { 600 return TestContext.CloudConfig.Provider.EnsureLoadBalancerResourcesDeleted(ctx, ip, portRange) 601 } 602 603 // CoreDump SSHs to the master and all nodes and dumps their logs into dir. 604 // It shells out to cluster/log-dump/log-dump.sh to accomplish this. 605 func CoreDump(dir string) { 606 if TestContext.DisableLogDump { 607 Logf("Skipping dumping logs from cluster") 608 return 609 } 610 var cmd *exec.Cmd 611 if TestContext.LogexporterGCSPath != "" { 612 Logf("Dumping logs from nodes to GCS directly at path: %s", TestContext.LogexporterGCSPath) 613 cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir, TestContext.LogexporterGCSPath) 614 } else { 615 Logf("Dumping logs locally to: %s", dir) 616 cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir) 617 } 618 env := os.Environ() 619 env = append(env, fmt.Sprintf("LOG_DUMP_SYSTEMD_SERVICES=%s", parseSystemdServices(TestContext.SystemdServices))) 620 env = append(env, fmt.Sprintf("LOG_DUMP_SYSTEMD_JOURNAL=%v", TestContext.DumpSystemdJournal)) 621 cmd.Env = env 622 623 cmd.Stdout = os.Stdout 624 cmd.Stderr = os.Stderr 625 if err := cmd.Run(); err != nil { 626 Logf("Error running cluster/log-dump/log-dump.sh: %v", err) 627 } 628 } 629 630 // parseSystemdServices converts services separator from comma to space. 631 func parseSystemdServices(services string) string { 632 return strings.TrimSpace(strings.Replace(services, ",", " ", -1)) 633 } 634 635 // RunCmd runs cmd using args and returns its stdout and stderr. It also outputs 636 // cmd's stdout and stderr to their respective OS streams. 637 func RunCmd(command string, args ...string) (string, string, error) { 638 return RunCmdEnv(nil, command, args...) 639 } 640 641 // RunCmdEnv runs cmd with the provided environment and args and 642 // returns its stdout and stderr. It also outputs cmd's stdout and 643 // stderr to their respective OS streams. 644 func RunCmdEnv(env []string, command string, args ...string) (string, string, error) { 645 Logf("Running %s %v", command, args) 646 var bout, berr bytes.Buffer 647 cmd := exec.Command(command, args...) 648 // We also output to the OS stdout/stderr to aid in debugging in case cmd 649 // hangs and never returns before the test gets killed. 650 // 651 // This creates some ugly output because gcloud doesn't always provide 652 // newlines. 653 cmd.Stdout = io.MultiWriter(os.Stdout, &bout) 654 cmd.Stderr = io.MultiWriter(os.Stderr, &berr) 655 cmd.Env = env 656 err := cmd.Run() 657 stdout, stderr := bout.String(), berr.String() 658 if err != nil { 659 return "", "", fmt.Errorf("error running %s %v; got error %v, stdout %q, stderr %q", 660 command, args, err, stdout, stderr) 661 } 662 return stdout, stderr, nil 663 } 664 665 // getControlPlaneAddresses returns the externalIP, internalIP and hostname fields of control plane nodes. 666 // If any of these is unavailable, empty slices are returned. 667 func getControlPlaneAddresses(ctx context.Context, c clientset.Interface) ([]string, []string, []string) { 668 var externalIPs, internalIPs, hostnames []string 669 670 // Populate the internal IPs. 671 eps, err := c.CoreV1().Endpoints(metav1.NamespaceDefault).Get(ctx, "kubernetes", metav1.GetOptions{}) 672 if err != nil { 673 Failf("Failed to get kubernetes endpoints: %v", err) 674 } 675 for _, subset := range eps.Subsets { 676 for _, address := range subset.Addresses { 677 if address.IP != "" { 678 internalIPs = append(internalIPs, address.IP) 679 } 680 } 681 } 682 683 // Populate the external IP/hostname. 684 hostURL, err := url.Parse(TestContext.Host) 685 if err != nil { 686 Failf("Failed to parse hostname: %v", err) 687 } 688 if netutils.ParseIPSloppy(hostURL.Host) != nil { 689 externalIPs = append(externalIPs, hostURL.Host) 690 } else { 691 hostnames = append(hostnames, hostURL.Host) 692 } 693 694 return externalIPs, internalIPs, hostnames 695 } 696 697 // GetControlPlaneAddresses returns all IP addresses on which the kubelet can reach the control plane. 698 // It may return internal and external IPs, even if we expect for 699 // e.g. internal IPs to be used (issue #56787), so that we can be 700 // sure to block the control plane fully during tests. 701 func GetControlPlaneAddresses(ctx context.Context, c clientset.Interface) []string { 702 externalIPs, internalIPs, _ := getControlPlaneAddresses(ctx, c) 703 704 ips := sets.NewString() 705 switch TestContext.Provider { 706 case "gce", "gke": 707 for _, ip := range externalIPs { 708 ips.Insert(ip) 709 } 710 for _, ip := range internalIPs { 711 ips.Insert(ip) 712 } 713 case "aws": 714 ips.Insert(awsMasterIP) 715 default: 716 Failf("This test is not supported for provider %s and should be disabled", TestContext.Provider) 717 } 718 return ips.List() 719 } 720 721 // PrettyPrintJSON converts metrics to JSON format. 722 func PrettyPrintJSON(metrics interface{}) string { 723 output := &bytes.Buffer{} 724 if err := json.NewEncoder(output).Encode(metrics); err != nil { 725 Logf("Error building encoder: %v", err) 726 return "" 727 } 728 formatted := &bytes.Buffer{} 729 if err := json.Indent(formatted, output.Bytes(), "", " "); err != nil { 730 Logf("Error indenting: %v", err) 731 return "" 732 } 733 return formatted.String() 734 } 735 736 // WatchEventSequenceVerifier ... 737 // manages a watch for a given resource, ensures that events take place in a given order, retries the test on failure 738 // 739 // ctx cancellation signal across API boundaries, e.g: context from Ginkgo 740 // dc sets up a client to the API 741 // resourceType specify the type of resource 742 // namespace select a namespace 743 // resourceName the name of the given resource 744 // listOptions options used to find the resource, recommended to use listOptions.labelSelector 745 // expectedWatchEvents array of events which are expected to occur 746 // scenario the test itself 747 // retryCleanup a function to run which ensures that there are no dangling resources upon test failure 748 // 749 // this tooling relies on the test to return the events as they occur 750 // the entire scenario must be run to ensure that the desired watch events arrive in order (allowing for interweaving of watch events) 751 // 752 // if an expected watch event is missing we elect to clean up and run the entire scenario again 753 // 754 // we try the scenario three times to allow the sequencing to fail a couple of times 755 func WatchEventSequenceVerifier(ctx context.Context, dc dynamic.Interface, resourceType schema.GroupVersionResource, namespace string, resourceName string, listOptions metav1.ListOptions, expectedWatchEvents []watch.Event, scenario func(*watchtools.RetryWatcher) []watch.Event, retryCleanup func() error) { 756 listWatcher := &cache.ListWatch{ 757 WatchFunc: func(listOptions metav1.ListOptions) (watch.Interface, error) { 758 return dc.Resource(resourceType).Namespace(namespace).Watch(ctx, listOptions) 759 }, 760 } 761 762 retries := 3 763 retriesLoop: 764 for try := 1; try <= retries; try++ { 765 initResource, err := dc.Resource(resourceType).Namespace(namespace).List(ctx, listOptions) 766 ExpectNoError(err, "Failed to fetch initial resource") 767 768 resourceWatch, err := watchtools.NewRetryWatcher(initResource.GetResourceVersion(), listWatcher) 769 ExpectNoError(err, "Failed to create a resource watch of %v in namespace %v", resourceType.Resource, namespace) 770 771 // NOTE the test may need access to the events to see what's going on, such as a change in status 772 actualWatchEvents := scenario(resourceWatch) 773 errs := sets.NewString() 774 gomega.Expect(len(expectedWatchEvents)).To(gomega.BeNumerically("<=", len(actualWatchEvents)), "Did not get enough watch events") 775 776 totalValidWatchEvents := 0 777 foundEventIndexes := map[int]*int{} 778 779 for watchEventIndex, expectedWatchEvent := range expectedWatchEvents { 780 foundExpectedWatchEvent := false 781 actualWatchEventsLoop: 782 for actualWatchEventIndex, actualWatchEvent := range actualWatchEvents { 783 if foundEventIndexes[actualWatchEventIndex] != nil { 784 continue actualWatchEventsLoop 785 } 786 if actualWatchEvent.Type == expectedWatchEvent.Type { 787 foundExpectedWatchEvent = true 788 foundEventIndexes[actualWatchEventIndex] = &watchEventIndex 789 break actualWatchEventsLoop 790 } 791 } 792 if !foundExpectedWatchEvent { 793 errs.Insert(fmt.Sprintf("Watch event %v not found", expectedWatchEvent.Type)) 794 } 795 totalValidWatchEvents++ 796 } 797 err = retryCleanup() 798 ExpectNoError(err, "Error occurred when cleaning up resources") 799 if errs.Len() > 0 && try < retries { 800 fmt.Println("invariants violated:\n", strings.Join(errs.List(), "\n - ")) 801 continue retriesLoop 802 } 803 if errs.Len() > 0 { 804 Failf("Unexpected error(s): %v", strings.Join(errs.List(), "\n - ")) 805 } 806 gomega.Expect(expectedWatchEvents).To(gomega.HaveLen(totalValidWatchEvents), "Error: there must be an equal amount of total valid watch events (%d) and expected watch events (%d)", totalValidWatchEvents, len(expectedWatchEvents)) 807 break retriesLoop 808 } 809 }