k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/framework/util.go (about)

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package framework
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/json"
    23  	"fmt"
    24  	"io"
    25  	"math/rand"
    26  	"net/url"
    27  	"os"
    28  	"os/exec"
    29  	"path"
    30  	"strconv"
    31  	"strings"
    32  	"sync"
    33  	"time"
    34  
    35  	"github.com/onsi/ginkgo/v2"
    36  	"github.com/onsi/gomega"
    37  
    38  	v1 "k8s.io/api/core/v1"
    39  	discoveryv1 "k8s.io/api/discovery/v1"
    40  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    41  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    42  	"k8s.io/apimachinery/pkg/fields"
    43  	"k8s.io/apimachinery/pkg/runtime"
    44  	"k8s.io/apimachinery/pkg/runtime/schema"
    45  	"k8s.io/apimachinery/pkg/util/sets"
    46  	"k8s.io/apimachinery/pkg/util/uuid"
    47  	"k8s.io/apimachinery/pkg/util/wait"
    48  	"k8s.io/apimachinery/pkg/watch"
    49  	"k8s.io/client-go/dynamic"
    50  	clientset "k8s.io/client-go/kubernetes"
    51  	restclient "k8s.io/client-go/rest"
    52  	"k8s.io/client-go/tools/cache"
    53  	"k8s.io/client-go/tools/clientcmd"
    54  	clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
    55  	watchtools "k8s.io/client-go/tools/watch"
    56  	netutils "k8s.io/utils/net"
    57  )
    58  
    59  const (
    60  	// TODO(justinsb): Avoid hardcoding this.
    61  	awsMasterIP = "172.20.0.9"
    62  )
    63  
    64  // DEPRECATED constants. Use the timeouts in framework.Framework instead.
    65  const (
    66  	// PodListTimeout is how long to wait for the pod to be listable.
    67  	PodListTimeout = time.Minute
    68  
    69  	// PodStartTimeout is how long to wait for the pod to be started.
    70  	PodStartTimeout = 5 * time.Minute
    71  
    72  	// PodStartShortTimeout is same as `PodStartTimeout` to wait for the pod to be started, but shorter.
    73  	// Use it case by case when we are sure pod start will not be delayed.
    74  	// minutes by slow docker pulls or something else.
    75  	PodStartShortTimeout = 2 * time.Minute
    76  
    77  	// PodDeleteTimeout is how long to wait for a pod to be deleted.
    78  	PodDeleteTimeout = 5 * time.Minute
    79  
    80  	// PodGetTimeout is how long to wait for a pod to be got.
    81  	PodGetTimeout = 2 * time.Minute
    82  
    83  	// PodEventTimeout is how much we wait for a pod event to occur.
    84  	PodEventTimeout = 2 * time.Minute
    85  
    86  	// ServiceStartTimeout is how long to wait for a service endpoint to be resolvable.
    87  	ServiceStartTimeout = 3 * time.Minute
    88  
    89  	// Poll is how often to Poll pods, nodes and claims.
    90  	Poll = 2 * time.Second
    91  
    92  	// PollShortTimeout is the short timeout value in polling.
    93  	PollShortTimeout = 1 * time.Minute
    94  
    95  	// ServiceAccountProvisionTimeout is how long to wait for a service account to be provisioned.
    96  	// service accounts are provisioned after namespace creation
    97  	// a service account is required to support pod creation in a namespace as part of admission control
    98  	ServiceAccountProvisionTimeout = 2 * time.Minute
    99  
   100  	// SingleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
   101  	// transient failures from failing tests.
   102  	SingleCallTimeout = 5 * time.Minute
   103  
   104  	// NodeReadyInitialTimeout is how long nodes have to be "ready" when a test begins. They should already
   105  	// be "ready" before the test starts, so this is small.
   106  	NodeReadyInitialTimeout = 20 * time.Second
   107  
   108  	// PodReadyBeforeTimeout is how long pods have to be "ready" when a test begins.
   109  	PodReadyBeforeTimeout = 5 * time.Minute
   110  
   111  	// ClaimProvisionShortTimeout is same as `ClaimProvisionTimeout` to wait for claim to be dynamically provisioned, but shorter.
   112  	// Use it case by case when we are sure this timeout is enough.
   113  	ClaimProvisionShortTimeout = 1 * time.Minute
   114  
   115  	// ClaimProvisionTimeout is how long claims have to become dynamically provisioned.
   116  	ClaimProvisionTimeout = 5 * time.Minute
   117  
   118  	// RestartNodeReadyAgainTimeout is how long a node is allowed to become "Ready" after it is restarted before
   119  	// the test is considered failed.
   120  	RestartNodeReadyAgainTimeout = 5 * time.Minute
   121  
   122  	// RestartPodReadyAgainTimeout is how long a pod is allowed to become "running" and "ready" after a node
   123  	// restart before test is considered failed.
   124  	RestartPodReadyAgainTimeout = 5 * time.Minute
   125  
   126  	// SnapshotCreateTimeout is how long for snapshot to create snapshotContent.
   127  	SnapshotCreateTimeout = 5 * time.Minute
   128  
   129  	// SnapshotDeleteTimeout is how long for snapshot to delete snapshotContent.
   130  	SnapshotDeleteTimeout = 5 * time.Minute
   131  )
   132  
   133  var (
   134  	// ProvidersWithSSH are those providers where each node is accessible with SSH
   135  	ProvidersWithSSH = []string{"gce", "gke", "aws", "local", "azure"}
   136  )
   137  
   138  // RunID is a unique identifier of the e2e run.
   139  // Beware that this ID is not the same for all tests in the e2e run, because each Ginkgo node creates it separately.
   140  var RunID = uuid.NewUUID()
   141  
   142  // CreateTestingNSFn is a func that is responsible for creating namespace used for executing e2e tests.
   143  type CreateTestingNSFn func(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error)
   144  
   145  // APIAddress returns a address of an instance.
   146  func APIAddress() string {
   147  	instanceURL, err := url.Parse(TestContext.Host)
   148  	ExpectNoError(err)
   149  	return instanceURL.Hostname()
   150  }
   151  
   152  // ProviderIs returns true if the provider is included is the providers. Otherwise false.
   153  func ProviderIs(providers ...string) bool {
   154  	for _, provider := range providers {
   155  		if strings.EqualFold(provider, TestContext.Provider) {
   156  			return true
   157  		}
   158  	}
   159  	return false
   160  }
   161  
   162  // MasterOSDistroIs returns true if the master OS distro is included in the supportedMasterOsDistros. Otherwise false.
   163  func MasterOSDistroIs(supportedMasterOsDistros ...string) bool {
   164  	for _, distro := range supportedMasterOsDistros {
   165  		if strings.EqualFold(distro, TestContext.MasterOSDistro) {
   166  			return true
   167  		}
   168  	}
   169  	return false
   170  }
   171  
   172  // NodeOSDistroIs returns true if the node OS distro is included in the supportedNodeOsDistros. Otherwise false.
   173  func NodeOSDistroIs(supportedNodeOsDistros ...string) bool {
   174  	for _, distro := range supportedNodeOsDistros {
   175  		if strings.EqualFold(distro, TestContext.NodeOSDistro) {
   176  			return true
   177  		}
   178  	}
   179  	return false
   180  }
   181  
   182  // NodeOSArchIs returns true if the node OS arch is included in the supportedNodeOsArchs. Otherwise false.
   183  func NodeOSArchIs(supportedNodeOsArchs ...string) bool {
   184  	for _, arch := range supportedNodeOsArchs {
   185  		if strings.EqualFold(arch, TestContext.NodeOSArch) {
   186  			return true
   187  		}
   188  	}
   189  	return false
   190  }
   191  
   192  // DeleteNamespaces deletes all namespaces that match the given delete and skip filters.
   193  // Filter is by simple strings.Contains; first skip filter, then delete filter.
   194  // Returns the list of deleted namespaces or an error.
   195  func DeleteNamespaces(ctx context.Context, c clientset.Interface, deleteFilter, skipFilter []string) ([]string, error) {
   196  	ginkgo.By("Deleting namespaces")
   197  	nsList, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{})
   198  	ExpectNoError(err, "Failed to get namespace list")
   199  	var deleted []string
   200  	var wg sync.WaitGroup
   201  OUTER:
   202  	for _, item := range nsList.Items {
   203  		for _, pattern := range skipFilter {
   204  			if strings.Contains(item.Name, pattern) {
   205  				continue OUTER
   206  			}
   207  		}
   208  		if deleteFilter != nil {
   209  			var shouldDelete bool
   210  			for _, pattern := range deleteFilter {
   211  				if strings.Contains(item.Name, pattern) {
   212  					shouldDelete = true
   213  					break
   214  				}
   215  			}
   216  			if !shouldDelete {
   217  				continue OUTER
   218  			}
   219  		}
   220  		wg.Add(1)
   221  		deleted = append(deleted, item.Name)
   222  		go func(nsName string) {
   223  			defer wg.Done()
   224  			defer ginkgo.GinkgoRecover()
   225  			gomega.Expect(c.CoreV1().Namespaces().Delete(ctx, nsName, metav1.DeleteOptions{})).To(gomega.Succeed())
   226  			Logf("namespace : %v api call to delete is complete ", nsName)
   227  		}(item.Name)
   228  	}
   229  	wg.Wait()
   230  	return deleted, nil
   231  }
   232  
   233  // WaitForNamespacesDeleted waits for the namespaces to be deleted.
   234  func WaitForNamespacesDeleted(ctx context.Context, c clientset.Interface, namespaces []string, timeout time.Duration) error {
   235  	ginkgo.By(fmt.Sprintf("Waiting for namespaces %+v to vanish", namespaces))
   236  	nsMap := map[string]bool{}
   237  	for _, ns := range namespaces {
   238  		nsMap[ns] = true
   239  	}
   240  	//Now POLL until all namespaces have been eradicated.
   241  	return wait.PollWithContext(ctx, 2*time.Second, timeout,
   242  		func(ctx context.Context) (bool, error) {
   243  			nsList, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{})
   244  			if err != nil {
   245  				return false, err
   246  			}
   247  			for _, item := range nsList.Items {
   248  				if _, ok := nsMap[item.Name]; ok {
   249  					return false, nil
   250  				}
   251  			}
   252  			return true, nil
   253  		})
   254  }
   255  
   256  func waitForConfigMapInNamespace(ctx context.Context, c clientset.Interface, ns, name string, timeout time.Duration) error {
   257  	fieldSelector := fields.OneTermEqualSelector("metadata.name", name).String()
   258  	ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, timeout)
   259  	defer cancel()
   260  	lw := &cache.ListWatch{
   261  		ListFunc: func(options metav1.ListOptions) (object runtime.Object, e error) {
   262  			options.FieldSelector = fieldSelector
   263  			return c.CoreV1().ConfigMaps(ns).List(ctx, options)
   264  		},
   265  		WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) {
   266  			options.FieldSelector = fieldSelector
   267  			return c.CoreV1().ConfigMaps(ns).Watch(ctx, options)
   268  		},
   269  	}
   270  	_, err := watchtools.UntilWithSync(ctx, lw, &v1.ConfigMap{}, nil, func(event watch.Event) (bool, error) {
   271  		switch event.Type {
   272  		case watch.Deleted:
   273  			return false, apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, name)
   274  		case watch.Added, watch.Modified:
   275  			return true, nil
   276  		}
   277  		return false, nil
   278  	})
   279  	return err
   280  }
   281  
   282  func waitForServiceAccountInNamespace(ctx context.Context, c clientset.Interface, ns, serviceAccountName string, timeout time.Duration) error {
   283  	fieldSelector := fields.OneTermEqualSelector("metadata.name", serviceAccountName).String()
   284  	ctx, cancel := watchtools.ContextWithOptionalTimeout(ctx, timeout)
   285  	defer cancel()
   286  	lw := &cache.ListWatch{
   287  		ListFunc: func(options metav1.ListOptions) (object runtime.Object, e error) {
   288  			options.FieldSelector = fieldSelector
   289  			return c.CoreV1().ServiceAccounts(ns).List(ctx, options)
   290  		},
   291  		WatchFunc: func(options metav1.ListOptions) (i watch.Interface, e error) {
   292  			options.FieldSelector = fieldSelector
   293  			return c.CoreV1().ServiceAccounts(ns).Watch(ctx, options)
   294  		},
   295  	}
   296  	_, err := watchtools.UntilWithSync(ctx, lw, &v1.ServiceAccount{}, nil, func(event watch.Event) (bool, error) {
   297  		switch event.Type {
   298  		case watch.Deleted:
   299  			return false, apierrors.NewNotFound(schema.GroupResource{Resource: "serviceaccounts"}, serviceAccountName)
   300  		case watch.Added, watch.Modified:
   301  			return true, nil
   302  		}
   303  		return false, nil
   304  	})
   305  	if err != nil {
   306  		return fmt.Errorf("wait for service account %q in namespace %q: %w", serviceAccountName, ns, err)
   307  	}
   308  	return nil
   309  }
   310  
   311  // WaitForDefaultServiceAccountInNamespace waits for the default service account to be provisioned
   312  // the default service account is what is associated with pods when they do not specify a service account
   313  // as a result, pods are not able to be provisioned in a namespace until the service account is provisioned
   314  func WaitForDefaultServiceAccountInNamespace(ctx context.Context, c clientset.Interface, namespace string) error {
   315  	return waitForServiceAccountInNamespace(ctx, c, namespace, defaultServiceAccountName, ServiceAccountProvisionTimeout)
   316  }
   317  
   318  // WaitForKubeRootCAInNamespace waits for the configmap kube-root-ca.crt containing the service account
   319  // CA trust bundle to be provisioned in the specified namespace so that pods do not have to retry mounting
   320  // the config map (which creates noise that hides other issues in the Kubelet).
   321  func WaitForKubeRootCAInNamespace(ctx context.Context, c clientset.Interface, namespace string) error {
   322  	return waitForConfigMapInNamespace(ctx, c, namespace, "kube-root-ca.crt", ServiceAccountProvisionTimeout)
   323  }
   324  
   325  // CreateTestingNS should be used by every test, note that we append a common prefix to the provided test name.
   326  // Please see NewFramework instead of using this directly.
   327  func CreateTestingNS(ctx context.Context, baseName string, c clientset.Interface, labels map[string]string) (*v1.Namespace, error) {
   328  	if labels == nil {
   329  		labels = map[string]string{}
   330  	}
   331  	labels["e2e-run"] = string(RunID)
   332  
   333  	// We don't use ObjectMeta.GenerateName feature, as in case of API call
   334  	// failure we don't know whether the namespace was created and what is its
   335  	// name.
   336  	name := fmt.Sprintf("%v-%v", baseName, RandomSuffix())
   337  
   338  	namespaceObj := &v1.Namespace{
   339  		ObjectMeta: metav1.ObjectMeta{
   340  			Name:      name,
   341  			Namespace: "",
   342  			Labels:    labels,
   343  		},
   344  		Status: v1.NamespaceStatus{},
   345  	}
   346  	// Be robust about making the namespace creation call.
   347  	var got *v1.Namespace
   348  	if err := wait.PollUntilContextTimeout(ctx, Poll, 30*time.Second, true, func(ctx context.Context) (bool, error) {
   349  		var err error
   350  		got, err = c.CoreV1().Namespaces().Create(ctx, namespaceObj, metav1.CreateOptions{})
   351  		if err != nil {
   352  			if apierrors.IsAlreadyExists(err) {
   353  				// regenerate on conflict
   354  				Logf("Namespace name %q was already taken, generate a new name and retry", namespaceObj.Name)
   355  				namespaceObj.Name = fmt.Sprintf("%v-%v", baseName, RandomSuffix())
   356  			} else {
   357  				Logf("Unexpected error while creating namespace: %v", err)
   358  			}
   359  			return false, nil
   360  		}
   361  		return true, nil
   362  	}); err != nil {
   363  		return nil, err
   364  	}
   365  
   366  	if TestContext.VerifyServiceAccount {
   367  		if err := WaitForDefaultServiceAccountInNamespace(ctx, c, got.Name); err != nil {
   368  			// Even if we fail to create serviceAccount in the namespace,
   369  			// we have successfully create a namespace.
   370  			// So, return the created namespace.
   371  			return got, err
   372  		}
   373  	}
   374  	return got, nil
   375  }
   376  
   377  // CheckTestingNSDeletedExcept checks whether all e2e based existing namespaces are in the Terminating state
   378  // and waits until they are finally deleted. It ignores namespace skip.
   379  func CheckTestingNSDeletedExcept(ctx context.Context, c clientset.Interface, skip string) error {
   380  	// TODO: Since we don't have support for bulk resource deletion in the API,
   381  	// while deleting a namespace we are deleting all objects from that namespace
   382  	// one by one (one deletion == one API call). This basically exposes us to
   383  	// throttling - currently controller-manager has a limit of max 20 QPS.
   384  	// Once #10217 is implemented and used in namespace-controller, deleting all
   385  	// object from a given namespace should be much faster and we will be able
   386  	// to lower this timeout.
   387  	// However, now Density test is producing ~26000 events and Load capacity test
   388  	// is producing ~35000 events, thus assuming there are no other requests it will
   389  	// take ~30 minutes to fully delete the namespace. Thus I'm setting it to 60
   390  	// minutes to avoid any timeouts here.
   391  	timeout := 60 * time.Minute
   392  
   393  	Logf("Waiting for terminating namespaces to be deleted...")
   394  	for start := time.Now(); time.Since(start) < timeout; time.Sleep(15 * time.Second) {
   395  		namespaces, err := c.CoreV1().Namespaces().List(ctx, metav1.ListOptions{})
   396  		if err != nil {
   397  			Logf("Listing namespaces failed: %v", err)
   398  			continue
   399  		}
   400  		terminating := 0
   401  		for _, ns := range namespaces.Items {
   402  			if strings.HasPrefix(ns.ObjectMeta.Name, "e2e-tests-") && ns.ObjectMeta.Name != skip {
   403  				if ns.Status.Phase == v1.NamespaceActive {
   404  					return fmt.Errorf("Namespace %s is active", ns.ObjectMeta.Name)
   405  				}
   406  				terminating++
   407  			}
   408  		}
   409  		if terminating == 0 {
   410  			return nil
   411  		}
   412  	}
   413  	return fmt.Errorf("Waiting for terminating namespaces to be deleted timed out")
   414  }
   415  
   416  // WaitForServiceEndpointsNum waits until the amount of endpoints that implement service to expectNum.
   417  // Some components use EndpointSlices other Endpoints, we must verify that both objects meet the requirements.
   418  func WaitForServiceEndpointsNum(ctx context.Context, c clientset.Interface, namespace, serviceName string, expectNum int, interval, timeout time.Duration) error {
   419  	return wait.PollWithContext(ctx, interval, timeout, func(ctx context.Context) (bool, error) {
   420  		Logf("Waiting for amount of service:%s endpoints to be %d", serviceName, expectNum)
   421  		endpoint, err := c.CoreV1().Endpoints(namespace).Get(ctx, serviceName, metav1.GetOptions{})
   422  		if err != nil {
   423  			Logf("Unexpected error trying to get Endpoints for %s : %v", serviceName, err)
   424  			return false, nil
   425  		}
   426  
   427  		if countEndpointsNum(endpoint) != expectNum {
   428  			Logf("Unexpected number of Endpoints, got %d, expected %d", countEndpointsNum(endpoint), expectNum)
   429  			return false, nil
   430  		}
   431  
   432  		// Endpoints are single family but EndpointSlices can have dual stack addresses,
   433  		// so we verify the number of addresses that matches the same family on both.
   434  		addressType := discoveryv1.AddressTypeIPv4
   435  		if isIPv6Endpoint(endpoint) {
   436  			addressType = discoveryv1.AddressTypeIPv6
   437  		}
   438  
   439  		esList, err := c.DiscoveryV1().EndpointSlices(namespace).List(ctx, metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", discoveryv1.LabelServiceName, serviceName)})
   440  		if err != nil {
   441  			Logf("Unexpected error trying to get EndpointSlices for %s : %v", serviceName, err)
   442  			return false, nil
   443  		}
   444  
   445  		if len(esList.Items) == 0 {
   446  			Logf("Waiting for at least 1 EndpointSlice to exist")
   447  			return false, nil
   448  		}
   449  
   450  		if countEndpointsSlicesNum(esList, addressType) != expectNum {
   451  			Logf("Unexpected number of Endpoints on Slices, got %d, expected %d", countEndpointsSlicesNum(esList, addressType), expectNum)
   452  			return false, nil
   453  		}
   454  		return true, nil
   455  	})
   456  }
   457  
   458  func countEndpointsNum(e *v1.Endpoints) int {
   459  	num := 0
   460  	for _, sub := range e.Subsets {
   461  		num += len(sub.Addresses)
   462  	}
   463  	return num
   464  }
   465  
   466  // isIPv6Endpoint returns true if the Endpoint uses IPv6 addresses
   467  func isIPv6Endpoint(e *v1.Endpoints) bool {
   468  	for _, sub := range e.Subsets {
   469  		for _, addr := range sub.Addresses {
   470  			if len(addr.IP) == 0 {
   471  				continue
   472  			}
   473  			// Endpoints are single family, so it is enough to check only one address
   474  			return netutils.IsIPv6String(addr.IP)
   475  		}
   476  	}
   477  	// default to IPv4 an Endpoint without IP addresses
   478  	return false
   479  }
   480  
   481  func countEndpointsSlicesNum(epList *discoveryv1.EndpointSliceList, addressType discoveryv1.AddressType) int {
   482  	// EndpointSlices can contain the same address on multiple Slices
   483  	addresses := sets.Set[string]{}
   484  	for _, epSlice := range epList.Items {
   485  		if epSlice.AddressType != addressType {
   486  			continue
   487  		}
   488  		for _, ep := range epSlice.Endpoints {
   489  			if len(ep.Addresses) > 0 {
   490  				addresses.Insert(ep.Addresses[0])
   491  			}
   492  		}
   493  	}
   494  	return addresses.Len()
   495  }
   496  
   497  // restclientConfig returns a config holds the information needed to build connection to kubernetes clusters.
   498  func restclientConfig(kubeContext string) (*clientcmdapi.Config, error) {
   499  	Logf(">>> kubeConfig: %s", TestContext.KubeConfig)
   500  	if TestContext.KubeConfig == "" {
   501  		return nil, fmt.Errorf("KubeConfig must be specified to load client config")
   502  	}
   503  	c, err := clientcmd.LoadFromFile(TestContext.KubeConfig)
   504  	if err != nil {
   505  		return nil, fmt.Errorf("error loading KubeConfig: %v", err.Error())
   506  	}
   507  	if kubeContext != "" {
   508  		Logf(">>> kubeContext: %s", kubeContext)
   509  		c.CurrentContext = kubeContext
   510  	}
   511  	return c, nil
   512  }
   513  
   514  // ClientConfigGetter is a func that returns getter to return a config.
   515  type ClientConfigGetter func() (*restclient.Config, error)
   516  
   517  // LoadConfig returns a config for a rest client with the UserAgent set to include the current test name.
   518  func LoadConfig() (config *restclient.Config, err error) {
   519  	defer func() {
   520  		if err == nil && config != nil {
   521  			testDesc := ginkgo.CurrentSpecReport()
   522  			if len(testDesc.ContainerHierarchyTexts) > 0 {
   523  				testName := strings.Join(testDesc.ContainerHierarchyTexts, " ")
   524  				if len(testDesc.LeafNodeText) > 0 {
   525  					testName = testName + " " + testDesc.LeafNodeText
   526  				}
   527  				config.UserAgent = fmt.Sprintf("%s -- %s", restclient.DefaultKubernetesUserAgent(), testName)
   528  			}
   529  		}
   530  	}()
   531  
   532  	if TestContext.NodeE2E {
   533  		// This is a node e2e test, apply the node e2e configuration
   534  		return &restclient.Config{
   535  			Host:        TestContext.Host,
   536  			BearerToken: TestContext.BearerToken,
   537  			TLSClientConfig: restclient.TLSClientConfig{
   538  				Insecure: true,
   539  			},
   540  		}, nil
   541  	}
   542  	c, err := restclientConfig(TestContext.KubeContext)
   543  	if err != nil {
   544  		if TestContext.KubeConfig == "" {
   545  			return restclient.InClusterConfig()
   546  		}
   547  		return nil, err
   548  	}
   549  	// In case Host is not set in TestContext, sets it as
   550  	// CurrentContext Server for k8s API client to connect to.
   551  	if TestContext.Host == "" && c.Clusters != nil {
   552  		currentContext, ok := c.Clusters[c.CurrentContext]
   553  		if ok {
   554  			TestContext.Host = currentContext.Server
   555  		}
   556  	}
   557  
   558  	return clientcmd.NewDefaultClientConfig(*c, &clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: TestContext.Host}}).ClientConfig()
   559  }
   560  
   561  // LoadClientset returns clientset for connecting to kubernetes clusters.
   562  func LoadClientset() (*clientset.Clientset, error) {
   563  	config, err := LoadConfig()
   564  	if err != nil {
   565  		return nil, fmt.Errorf("error creating client: %v", err.Error())
   566  	}
   567  	return clientset.NewForConfig(config)
   568  }
   569  
   570  // RandomSuffix provides a random sequence to append to pods,services,rcs.
   571  func RandomSuffix() string {
   572  	return strconv.Itoa(rand.Intn(10000))
   573  }
   574  
   575  // StartCmdAndStreamOutput returns stdout and stderr after starting the given cmd.
   576  func StartCmdAndStreamOutput(cmd *exec.Cmd) (stdout, stderr io.ReadCloser, err error) {
   577  	stdout, err = cmd.StdoutPipe()
   578  	if err != nil {
   579  		return
   580  	}
   581  	stderr, err = cmd.StderrPipe()
   582  	if err != nil {
   583  		return
   584  	}
   585  	Logf("Asynchronously running '%s %s'", cmd.Path, strings.Join(cmd.Args, " "))
   586  	err = cmd.Start()
   587  	return
   588  }
   589  
   590  // TryKill is rough equivalent of ctrl+c for cleaning up processes. Intended to be run in defer.
   591  func TryKill(cmd *exec.Cmd) {
   592  	if err := cmd.Process.Kill(); err != nil {
   593  		Logf("ERROR failed to kill command %v! The process may leak", cmd)
   594  	}
   595  }
   596  
   597  // EnsureLoadBalancerResourcesDeleted ensures that cloud load balancer resources that were created
   598  // are actually cleaned up.  Currently only implemented for GCE/GKE.
   599  func EnsureLoadBalancerResourcesDeleted(ctx context.Context, ip, portRange string) error {
   600  	return TestContext.CloudConfig.Provider.EnsureLoadBalancerResourcesDeleted(ctx, ip, portRange)
   601  }
   602  
   603  // CoreDump SSHs to the master and all nodes and dumps their logs into dir.
   604  // It shells out to cluster/log-dump/log-dump.sh to accomplish this.
   605  func CoreDump(dir string) {
   606  	if TestContext.DisableLogDump {
   607  		Logf("Skipping dumping logs from cluster")
   608  		return
   609  	}
   610  	var cmd *exec.Cmd
   611  	if TestContext.LogexporterGCSPath != "" {
   612  		Logf("Dumping logs from nodes to GCS directly at path: %s", TestContext.LogexporterGCSPath)
   613  		cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir, TestContext.LogexporterGCSPath)
   614  	} else {
   615  		Logf("Dumping logs locally to: %s", dir)
   616  		cmd = exec.Command(path.Join(TestContext.RepoRoot, "cluster", "log-dump", "log-dump.sh"), dir)
   617  	}
   618  	env := os.Environ()
   619  	env = append(env, fmt.Sprintf("LOG_DUMP_SYSTEMD_SERVICES=%s", parseSystemdServices(TestContext.SystemdServices)))
   620  	env = append(env, fmt.Sprintf("LOG_DUMP_SYSTEMD_JOURNAL=%v", TestContext.DumpSystemdJournal))
   621  	cmd.Env = env
   622  
   623  	cmd.Stdout = os.Stdout
   624  	cmd.Stderr = os.Stderr
   625  	if err := cmd.Run(); err != nil {
   626  		Logf("Error running cluster/log-dump/log-dump.sh: %v", err)
   627  	}
   628  }
   629  
   630  // parseSystemdServices converts services separator from comma to space.
   631  func parseSystemdServices(services string) string {
   632  	return strings.TrimSpace(strings.Replace(services, ",", " ", -1))
   633  }
   634  
   635  // RunCmd runs cmd using args and returns its stdout and stderr. It also outputs
   636  // cmd's stdout and stderr to their respective OS streams.
   637  func RunCmd(command string, args ...string) (string, string, error) {
   638  	return RunCmdEnv(nil, command, args...)
   639  }
   640  
   641  // RunCmdEnv runs cmd with the provided environment and args and
   642  // returns its stdout and stderr. It also outputs cmd's stdout and
   643  // stderr to their respective OS streams.
   644  func RunCmdEnv(env []string, command string, args ...string) (string, string, error) {
   645  	Logf("Running %s %v", command, args)
   646  	var bout, berr bytes.Buffer
   647  	cmd := exec.Command(command, args...)
   648  	// We also output to the OS stdout/stderr to aid in debugging in case cmd
   649  	// hangs and never returns before the test gets killed.
   650  	//
   651  	// This creates some ugly output because gcloud doesn't always provide
   652  	// newlines.
   653  	cmd.Stdout = io.MultiWriter(os.Stdout, &bout)
   654  	cmd.Stderr = io.MultiWriter(os.Stderr, &berr)
   655  	cmd.Env = env
   656  	err := cmd.Run()
   657  	stdout, stderr := bout.String(), berr.String()
   658  	if err != nil {
   659  		return "", "", fmt.Errorf("error running %s %v; got error %v, stdout %q, stderr %q",
   660  			command, args, err, stdout, stderr)
   661  	}
   662  	return stdout, stderr, nil
   663  }
   664  
   665  // getControlPlaneAddresses returns the externalIP, internalIP and hostname fields of control plane nodes.
   666  // If any of these is unavailable, empty slices are returned.
   667  func getControlPlaneAddresses(ctx context.Context, c clientset.Interface) ([]string, []string, []string) {
   668  	var externalIPs, internalIPs, hostnames []string
   669  
   670  	// Populate the internal IPs.
   671  	eps, err := c.CoreV1().Endpoints(metav1.NamespaceDefault).Get(ctx, "kubernetes", metav1.GetOptions{})
   672  	if err != nil {
   673  		Failf("Failed to get kubernetes endpoints: %v", err)
   674  	}
   675  	for _, subset := range eps.Subsets {
   676  		for _, address := range subset.Addresses {
   677  			if address.IP != "" {
   678  				internalIPs = append(internalIPs, address.IP)
   679  			}
   680  		}
   681  	}
   682  
   683  	// Populate the external IP/hostname.
   684  	hostURL, err := url.Parse(TestContext.Host)
   685  	if err != nil {
   686  		Failf("Failed to parse hostname: %v", err)
   687  	}
   688  	if netutils.ParseIPSloppy(hostURL.Host) != nil {
   689  		externalIPs = append(externalIPs, hostURL.Host)
   690  	} else {
   691  		hostnames = append(hostnames, hostURL.Host)
   692  	}
   693  
   694  	return externalIPs, internalIPs, hostnames
   695  }
   696  
   697  // GetControlPlaneAddresses returns all IP addresses on which the kubelet can reach the control plane.
   698  // It may return internal and external IPs, even if we expect for
   699  // e.g. internal IPs to be used (issue #56787), so that we can be
   700  // sure to block the control plane fully during tests.
   701  func GetControlPlaneAddresses(ctx context.Context, c clientset.Interface) []string {
   702  	externalIPs, internalIPs, _ := getControlPlaneAddresses(ctx, c)
   703  
   704  	ips := sets.NewString()
   705  	switch TestContext.Provider {
   706  	case "gce", "gke":
   707  		for _, ip := range externalIPs {
   708  			ips.Insert(ip)
   709  		}
   710  		for _, ip := range internalIPs {
   711  			ips.Insert(ip)
   712  		}
   713  	case "aws":
   714  		ips.Insert(awsMasterIP)
   715  	default:
   716  		Failf("This test is not supported for provider %s and should be disabled", TestContext.Provider)
   717  	}
   718  	return ips.List()
   719  }
   720  
   721  // PrettyPrintJSON converts metrics to JSON format.
   722  func PrettyPrintJSON(metrics interface{}) string {
   723  	output := &bytes.Buffer{}
   724  	if err := json.NewEncoder(output).Encode(metrics); err != nil {
   725  		Logf("Error building encoder: %v", err)
   726  		return ""
   727  	}
   728  	formatted := &bytes.Buffer{}
   729  	if err := json.Indent(formatted, output.Bytes(), "", "  "); err != nil {
   730  		Logf("Error indenting: %v", err)
   731  		return ""
   732  	}
   733  	return formatted.String()
   734  }
   735  
   736  // WatchEventSequenceVerifier ...
   737  // manages a watch for a given resource, ensures that events take place in a given order, retries the test on failure
   738  //
   739  //	ctx                 cancellation signal across API boundaries, e.g: context from Ginkgo
   740  //	dc                  sets up a client to the API
   741  //	resourceType        specify the type of resource
   742  //	namespace           select a namespace
   743  //	resourceName        the name of the given resource
   744  //	listOptions         options used to find the resource, recommended to use listOptions.labelSelector
   745  //	expectedWatchEvents array of events which are expected to occur
   746  //	scenario            the test itself
   747  //	retryCleanup        a function to run which ensures that there are no dangling resources upon test failure
   748  //
   749  // this tooling relies on the test to return the events as they occur
   750  // the entire scenario must be run to ensure that the desired watch events arrive in order (allowing for interweaving of watch events)
   751  //
   752  //	if an expected watch event is missing we elect to clean up and run the entire scenario again
   753  //
   754  // we try the scenario three times to allow the sequencing to fail a couple of times
   755  func WatchEventSequenceVerifier(ctx context.Context, dc dynamic.Interface, resourceType schema.GroupVersionResource, namespace string, resourceName string, listOptions metav1.ListOptions, expectedWatchEvents []watch.Event, scenario func(*watchtools.RetryWatcher) []watch.Event, retryCleanup func() error) {
   756  	listWatcher := &cache.ListWatch{
   757  		WatchFunc: func(listOptions metav1.ListOptions) (watch.Interface, error) {
   758  			return dc.Resource(resourceType).Namespace(namespace).Watch(ctx, listOptions)
   759  		},
   760  	}
   761  
   762  	retries := 3
   763  retriesLoop:
   764  	for try := 1; try <= retries; try++ {
   765  		initResource, err := dc.Resource(resourceType).Namespace(namespace).List(ctx, listOptions)
   766  		ExpectNoError(err, "Failed to fetch initial resource")
   767  
   768  		resourceWatch, err := watchtools.NewRetryWatcher(initResource.GetResourceVersion(), listWatcher)
   769  		ExpectNoError(err, "Failed to create a resource watch of %v in namespace %v", resourceType.Resource, namespace)
   770  
   771  		// NOTE the test may need access to the events to see what's going on, such as a change in status
   772  		actualWatchEvents := scenario(resourceWatch)
   773  		errs := sets.NewString()
   774  		gomega.Expect(len(expectedWatchEvents)).To(gomega.BeNumerically("<=", len(actualWatchEvents)), "Did not get enough watch events")
   775  
   776  		totalValidWatchEvents := 0
   777  		foundEventIndexes := map[int]*int{}
   778  
   779  		for watchEventIndex, expectedWatchEvent := range expectedWatchEvents {
   780  			foundExpectedWatchEvent := false
   781  		actualWatchEventsLoop:
   782  			for actualWatchEventIndex, actualWatchEvent := range actualWatchEvents {
   783  				if foundEventIndexes[actualWatchEventIndex] != nil {
   784  					continue actualWatchEventsLoop
   785  				}
   786  				if actualWatchEvent.Type == expectedWatchEvent.Type {
   787  					foundExpectedWatchEvent = true
   788  					foundEventIndexes[actualWatchEventIndex] = &watchEventIndex
   789  					break actualWatchEventsLoop
   790  				}
   791  			}
   792  			if !foundExpectedWatchEvent {
   793  				errs.Insert(fmt.Sprintf("Watch event %v not found", expectedWatchEvent.Type))
   794  			}
   795  			totalValidWatchEvents++
   796  		}
   797  		err = retryCleanup()
   798  		ExpectNoError(err, "Error occurred when cleaning up resources")
   799  		if errs.Len() > 0 && try < retries {
   800  			fmt.Println("invariants violated:\n", strings.Join(errs.List(), "\n - "))
   801  			continue retriesLoop
   802  		}
   803  		if errs.Len() > 0 {
   804  			Failf("Unexpected error(s): %v", strings.Join(errs.List(), "\n - "))
   805  		}
   806  		gomega.Expect(expectedWatchEvents).To(gomega.HaveLen(totalValidWatchEvents), "Error: there must be an equal amount of total valid watch events (%d) and expected watch events (%d)", totalValidWatchEvents, len(expectedWatchEvents))
   807  		break retriesLoop
   808  	}
   809  }