k8s.io/kubernetes@v1.29.3/test/e2e/framework/pod/wait.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package pod
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"reflect"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/onsi/ginkgo/v2"
    28  	"github.com/onsi/gomega"
    29  	"github.com/onsi/gomega/gcustom"
    30  	"github.com/onsi/gomega/types"
    31  
    32  	appsv1 "k8s.io/api/apps/v1"
    33  	v1 "k8s.io/api/core/v1"
    34  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    35  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    36  	"k8s.io/apimachinery/pkg/labels"
    37  	apitypes "k8s.io/apimachinery/pkg/types"
    38  	clientset "k8s.io/client-go/kubernetes"
    39  	"k8s.io/kubectl/pkg/util/podutils"
    40  	"k8s.io/kubernetes/test/e2e/framework"
    41  	testutils "k8s.io/kubernetes/test/utils"
    42  	"k8s.io/kubernetes/test/utils/format"
    43  )
    44  
    45  const (
    46  	// defaultPodDeletionTimeout is the default timeout for deleting pod.
    47  	defaultPodDeletionTimeout = 3 * time.Minute
    48  
    49  	// podListTimeout is how long to wait for the pod to be listable.
    50  	podListTimeout = time.Minute
    51  
    52  	podRespondingTimeout = 15 * time.Minute
    53  
    54  	// How long pods have to become scheduled onto nodes
    55  	podScheduledBeforeTimeout = podListTimeout + (20 * time.Second)
    56  
    57  	// podStartTimeout is how long to wait for the pod to be started.
    58  	podStartTimeout = 5 * time.Minute
    59  
    60  	// singleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
    61  	// transient failures from failing tests.
    62  	singleCallTimeout = 5 * time.Minute
    63  
    64  	// Some pods can take much longer to get ready due to volume attach/detach latency.
    65  	slowPodStartTimeout = 15 * time.Minute
    66  )
    67  
    68  type podCondition func(pod *v1.Pod) (bool, error)
    69  
    70  // BeRunningNoRetries verifies that a pod starts running. It's a permanent
    71  // failure when the pod enters some other permanent phase.
    72  func BeRunningNoRetries() types.GomegaMatcher {
    73  	return gomega.And(
    74  		// This additional matcher checks for the final error condition.
    75  		gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) {
    76  			switch pod.Status.Phase {
    77  			case v1.PodFailed, v1.PodSucceeded:
    78  				return false, gomega.StopTrying(fmt.Sprintf("Expected pod to reach phase %q, got final phase %q instead.", v1.PodRunning, pod.Status.Phase))
    79  			default:
    80  				return true, nil
    81  			}
    82  		}),
    83  		BeInPhase(v1.PodRunning),
    84  	)
    85  }
    86  
    87  // BeInPhase matches if pod.status.phase is the expected phase.
    88  func BeInPhase(phase v1.PodPhase) types.GomegaMatcher {
    89  	// A simple implementation of this would be:
    90  	// return gomega.HaveField("Status.Phase", phase)
    91  	//
    92  	// But that produces a fairly generic
    93  	//     Value for field 'Status.Phase' failed to satisfy matcher.
    94  	// failure message and doesn't show the pod. We can do better than
    95  	// that with a custom matcher.
    96  
    97  	return gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) {
    98  		return pod.Status.Phase == phase, nil
    99  	}).WithTemplate("Expected Pod {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(phase)
   100  }
   101  
   102  // WaitForPodsRunningReady waits up to timeout to ensure that all pods in
   103  // namespace ns are either running and ready, or failed but controlled by a
   104  // controller. Also, it ensures that at least minPods are running and
   105  // ready. It has separate behavior from other 'wait for' pods functions in
   106  // that it requests the list of pods on every iteration. This is useful, for
   107  // example, in cluster startup, because the number of pods increases while
   108  // waiting. All pods that are in SUCCESS state are not counted.
   109  //
   110  // If minPods or allowedNotReadyPods are -1, this method returns immediately
   111  // without waiting.
   112  func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration) error {
   113  	if minPods == -1 || allowedNotReadyPods == -1 {
   114  		return nil
   115  	}
   116  
   117  	// We get the new list of pods, replication controllers, and replica
   118  	// sets in every iteration because more pods come online during startup
   119  	// and we want to ensure they are also checked.
   120  	//
   121  	// This struct gets populated while polling, then gets checked, and in
   122  	// case of a timeout is included in the failure message.
   123  	type state struct {
   124  		ReplicationControllers []v1.ReplicationController
   125  		ReplicaSets            []appsv1.ReplicaSet
   126  		Pods                   []v1.Pod
   127  	}
   128  
   129  	// notReady is -1 for any failure other than a timeout.
   130  	// Otherwise it is the number of pods that we were still
   131  	// waiting for.
   132  	notReady := int32(-1)
   133  
   134  	err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) {
   135  		// Reset notReady at the start of a poll attempt.
   136  		notReady = -1
   137  
   138  		rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{})
   139  		if err != nil {
   140  			return nil, fmt.Errorf("listing replication controllers in namespace %s: %w", ns, err)
   141  		}
   142  		rsList, err := c.AppsV1().ReplicaSets(ns).List(ctx, metav1.ListOptions{})
   143  		if err != nil {
   144  			return nil, fmt.Errorf("listing replication sets in namespace %s: %w", ns, err)
   145  		}
   146  		podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
   147  		if err != nil {
   148  			return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err)
   149  		}
   150  		return &state{
   151  			ReplicationControllers: rcList.Items,
   152  			ReplicaSets:            rsList.Items,
   153  			Pods:                   podList.Items,
   154  		}, nil
   155  	})).WithTimeout(timeout).Should(framework.MakeMatcher(func(s *state) (func() string, error) {
   156  		replicas, replicaOk := int32(0), int32(0)
   157  		for _, rc := range s.ReplicationControllers {
   158  			replicas += *rc.Spec.Replicas
   159  			replicaOk += rc.Status.ReadyReplicas
   160  		}
   161  		for _, rs := range s.ReplicaSets {
   162  			replicas += *rs.Spec.Replicas
   163  			replicaOk += rs.Status.ReadyReplicas
   164  		}
   165  
   166  		nOk := int32(0)
   167  		notReady = int32(0)
   168  		failedPods := []v1.Pod{}
   169  		otherPods := []v1.Pod{}
   170  		succeededPods := []string{}
   171  		for _, pod := range s.Pods {
   172  			res, err := testutils.PodRunningReady(&pod)
   173  			switch {
   174  			case res && err == nil:
   175  				nOk++
   176  			case pod.Status.Phase == v1.PodSucceeded:
   177  				// it doesn't make sense to wait for this pod
   178  				succeededPods = append(succeededPods, pod.Name)
   179  			case pod.Status.Phase == v1.PodFailed:
   180  				// ignore failed pods that are controlled by some controller
   181  				if metav1.GetControllerOf(&pod) == nil {
   182  					failedPods = append(failedPods, pod)
   183  				}
   184  			default:
   185  				notReady++
   186  				otherPods = append(otherPods, pod)
   187  			}
   188  		}
   189  		done := replicaOk == replicas && nOk >= minPods && (len(failedPods)+len(otherPods)) == 0
   190  		if done {
   191  			return nil, nil
   192  		}
   193  
   194  		// Delayed formatting of a failure message.
   195  		return func() string {
   196  			var buffer strings.Builder
   197  			buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready (except for %d).\n", minPods, ns, allowedNotReadyPods))
   198  			buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(s.Pods)))
   199  			buffer.WriteString(fmt.Sprintf("Expected %d pod replicas, %d are Running and Ready.\n", replicas, replicaOk))
   200  			if len(succeededPods) > 0 {
   201  				buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1)))
   202  			}
   203  			if len(failedPods) > 0 {
   204  				buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(failedPods, 1)))
   205  			}
   206  			if len(otherPods) > 0 {
   207  				buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1)))
   208  			}
   209  			return buffer.String()
   210  		}, nil
   211  	}))
   212  
   213  	// An error might not be fatal.
   214  	if err != nil && notReady >= 0 && notReady <= allowedNotReadyPods {
   215  		framework.Logf("Number of not-ready pods (%d) is below the allowed threshold (%d).", notReady, allowedNotReadyPods)
   216  		return nil
   217  	}
   218  	return err
   219  }
   220  
   221  // WaitForPodCondition waits a pods to be matched to the given condition.
   222  // The condition callback may use gomega.StopTrying to abort early.
   223  func WaitForPodCondition(ctx context.Context, c clientset.Interface, ns, podName, conditionDesc string, timeout time.Duration, condition podCondition) error {
   224  	return framework.Gomega().
   225  		Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(ns).Get, podName, metav1.GetOptions{}))).
   226  		WithTimeout(timeout).
   227  		Should(framework.MakeMatcher(func(pod *v1.Pod) (func() string, error) {
   228  			done, err := condition(pod)
   229  			if err != nil {
   230  				return nil, err
   231  			}
   232  			if done {
   233  				return nil, nil
   234  			}
   235  			return func() string {
   236  				return fmt.Sprintf("expected pod to be %s, got instead:\n%s", conditionDesc, format.Object(pod, 1))
   237  			}, nil
   238  		}))
   239  }
   240  
   241  // Range determines how many items must exist and how many must match a certain
   242  // condition. Values <= 0 are ignored.
   243  // TODO (?): move to test/e2e/framework/range
   244  type Range struct {
   245  	// MinMatching must be <= actual matching items or <= 0.
   246  	MinMatching int
   247  	// MaxMatching must be >= actual matching items or <= 0.
   248  	// To check for "no matching items", set NonMatching.
   249  	MaxMatching int
   250  	// NoneMatching indicates that no item must match.
   251  	NoneMatching bool
   252  	// AllMatching indicates that all items must match.
   253  	AllMatching bool
   254  	// MinFound must be <= existing items or <= 0.
   255  	MinFound int
   256  }
   257  
   258  // Min returns how many items must exist.
   259  func (r Range) Min() int {
   260  	min := r.MinMatching
   261  	if min < r.MinFound {
   262  		min = r.MinFound
   263  	}
   264  	return min
   265  }
   266  
   267  // WaitForPods waits for pods in the given namespace to match the given
   268  // condition. How many pods must exist and how many must match the condition
   269  // is determined by the range parameter. The condition callback may use
   270  // gomega.StopTrying(...).Now() to abort early. The condition description
   271  // will be used with "expected pods to <description>".
   272  func WaitForPods(ctx context.Context, c clientset.Interface, ns string, opts metav1.ListOptions, r Range, timeout time.Duration, conditionDesc string, condition func(*v1.Pod) bool) (*v1.PodList, error) {
   273  	var finalPods *v1.PodList
   274  	minPods := r.Min()
   275  	match := func(pods *v1.PodList) (func() string, error) {
   276  		finalPods = pods
   277  
   278  		if len(pods.Items) < minPods {
   279  			return func() string {
   280  				return fmt.Sprintf("expected at least %d pods, only got %d", minPods, len(pods.Items))
   281  			}, nil
   282  		}
   283  
   284  		var nonMatchingPods, matchingPods []v1.Pod
   285  		for _, pod := range pods.Items {
   286  			if condition(&pod) {
   287  				matchingPods = append(matchingPods, pod)
   288  			} else {
   289  				nonMatchingPods = append(nonMatchingPods, pod)
   290  			}
   291  		}
   292  		matching := len(pods.Items) - len(nonMatchingPods)
   293  		if matching < r.MinMatching && r.MinMatching > 0 {
   294  			return func() string {
   295  				return fmt.Sprintf("expected at least %d pods to %s, %d out of %d were not:\n%s",
   296  					r.MinMatching, conditionDesc, len(nonMatchingPods), len(pods.Items),
   297  					format.Object(nonMatchingPods, 1))
   298  			}, nil
   299  		}
   300  		if len(nonMatchingPods) > 0 && r.AllMatching {
   301  			return func() string {
   302  				return fmt.Sprintf("expected all pods to %s, %d out of %d were not:\n%s",
   303  					conditionDesc, len(nonMatchingPods), len(pods.Items),
   304  					format.Object(nonMatchingPods, 1))
   305  			}, nil
   306  		}
   307  		if matching > r.MaxMatching && r.MaxMatching > 0 {
   308  			return func() string {
   309  				return fmt.Sprintf("expected at most %d pods to %s, %d out of %d were:\n%s",
   310  					r.MinMatching, conditionDesc, len(matchingPods), len(pods.Items),
   311  					format.Object(matchingPods, 1))
   312  			}, nil
   313  		}
   314  		if matching > 0 && r.NoneMatching {
   315  			return func() string {
   316  				return fmt.Sprintf("expected no pods to %s, %d out of %d were:\n%s",
   317  					conditionDesc, len(matchingPods), len(pods.Items),
   318  					format.Object(matchingPods, 1))
   319  			}, nil
   320  		}
   321  		return nil, nil
   322  	}
   323  
   324  	err := framework.Gomega().
   325  		Eventually(ctx, framework.ListObjects(c.CoreV1().Pods(ns).List, opts)).
   326  		WithTimeout(timeout).
   327  		Should(framework.MakeMatcher(match))
   328  	return finalPods, err
   329  }
   330  
   331  // RunningReady checks whether pod p's phase is running and it has a ready
   332  // condition of status true.
   333  func RunningReady(p *v1.Pod) bool {
   334  	return p.Status.Phase == v1.PodRunning && podutils.IsPodReady(p)
   335  }
   336  
   337  // WaitForPodsRunning waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` are running.
   338  func WaitForPodsRunning(c clientset.Interface, ns string, num int, timeout time.Duration) error {
   339  	_, err := WaitForPods(context.TODO(), c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   340  		"be running and ready", func(pod *v1.Pod) bool {
   341  			ready, _ := testutils.PodRunningReady(pod)
   342  			return ready
   343  		})
   344  	return err
   345  }
   346  
   347  // WaitForPodsSchedulingGated waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` stay in scheduling gated state.
   348  func WaitForPodsSchedulingGated(c clientset.Interface, ns string, num int, timeout time.Duration) error {
   349  	_, err := WaitForPods(context.TODO(), c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   350  		"be in scheduling gated state", func(pod *v1.Pod) bool {
   351  			for _, condition := range pod.Status.Conditions {
   352  				if condition.Type == v1.PodScheduled && condition.Reason == v1.PodReasonSchedulingGated {
   353  					return true
   354  				}
   355  			}
   356  			return false
   357  		})
   358  	return err
   359  }
   360  
   361  // WaitForPodsWithSchedulingGates waits for a given `timeout` to evaluate if a certain amount of pods in given `ns`
   362  // match the given `schedulingGates`stay in scheduling gated state.
   363  func WaitForPodsWithSchedulingGates(c clientset.Interface, ns string, num int, timeout time.Duration, schedulingGates []v1.PodSchedulingGate) error {
   364  	_, err := WaitForPods(context.TODO(), c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   365  		"have certain scheduling gates", func(pod *v1.Pod) bool {
   366  			return reflect.DeepEqual(pod.Spec.SchedulingGates, schedulingGates)
   367  		})
   368  	return err
   369  }
   370  
   371  // WaitForPodTerminatedInNamespace returns an error if it takes too long for the pod to terminate,
   372  // if the pod Get api returns an error (IsNotFound or other), or if the pod failed (and thus did not
   373  // terminate) with an unexpected reason. Typically called to test that the passed-in pod is fully
   374  // terminated (reason==""), but may be called to detect if a pod did *not* terminate according to
   375  // the supplied reason.
   376  func WaitForPodTerminatedInNamespace(ctx context.Context, c clientset.Interface, podName, reason, namespace string) error {
   377  	return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("terminated with reason %s", reason), podStartTimeout, func(pod *v1.Pod) (bool, error) {
   378  		// Only consider Failed pods. Successful pods will be deleted and detected in
   379  		// waitForPodCondition's Get call returning `IsNotFound`
   380  		if pod.Status.Phase == v1.PodFailed {
   381  			if pod.Status.Reason == reason { // short-circuit waitForPodCondition's loop
   382  				return true, nil
   383  			}
   384  			return true, fmt.Errorf("Expected pod %q in namespace %q to be terminated with reason %q, got reason: %q", podName, namespace, reason, pod.Status.Reason)
   385  		}
   386  		return false, nil
   387  	})
   388  }
   389  
   390  // WaitForPodTerminatingInNamespaceTimeout returns if the pod is terminating, or an error if it is not after the timeout.
   391  func WaitForPodTerminatingInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   392  	return WaitForPodCondition(ctx, c, namespace, podName, "is terminating", timeout, func(pod *v1.Pod) (bool, error) {
   393  		if pod.DeletionTimestamp != nil {
   394  			return true, nil
   395  		}
   396  		return false, nil
   397  	})
   398  }
   399  
   400  // WaitForPodSuccessInNamespaceTimeout returns nil if the pod reached state success, or an error if it reached failure or ran too long.
   401  func WaitForPodSuccessInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   402  	return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("%s or %s", v1.PodSucceeded, v1.PodFailed), timeout, func(pod *v1.Pod) (bool, error) {
   403  		if pod.DeletionTimestamp == nil && pod.Spec.RestartPolicy == v1.RestartPolicyAlways {
   404  			return true, fmt.Errorf("pod %q will never terminate with a succeeded state since its restart policy is Always", podName)
   405  		}
   406  		switch pod.Status.Phase {
   407  		case v1.PodSucceeded:
   408  			ginkgo.By("Saw pod success")
   409  			return true, nil
   410  		case v1.PodFailed:
   411  			return true, fmt.Errorf("pod %q failed with status: %+v", podName, pod.Status)
   412  		default:
   413  			return false, nil
   414  		}
   415  	})
   416  }
   417  
   418  // WaitForPodNameUnschedulableInNamespace returns an error if it takes too long for the pod to become Pending
   419  // and have condition Status equal to Unschedulable,
   420  // if the pod Get api returns an error (IsNotFound or other), or if the pod failed with an unexpected reason.
   421  // Typically called to test that the passed-in pod is Pending and Unschedulable.
   422  func WaitForPodNameUnschedulableInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   423  	return WaitForPodCondition(ctx, c, namespace, podName, v1.PodReasonUnschedulable, podStartTimeout, func(pod *v1.Pod) (bool, error) {
   424  		// Only consider Failed pods. Successful pods will be deleted and detected in
   425  		// waitForPodCondition's Get call returning `IsNotFound`
   426  		if pod.Status.Phase == v1.PodPending {
   427  			for _, cond := range pod.Status.Conditions {
   428  				if cond.Type == v1.PodScheduled && cond.Status == v1.ConditionFalse && cond.Reason == v1.PodReasonUnschedulable {
   429  					return true, nil
   430  				}
   431  			}
   432  		}
   433  		if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
   434  			return true, fmt.Errorf("Expected pod %q in namespace %q to be in phase Pending, but got phase: %v", podName, namespace, pod.Status.Phase)
   435  		}
   436  		return false, nil
   437  	})
   438  }
   439  
   440  // WaitForPodNameRunningInNamespace waits default amount of time (PodStartTimeout) for the specified pod to become running.
   441  // Returns an error if timeout occurs first, or pod goes in to failed state.
   442  func WaitForPodNameRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   443  	return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, podStartTimeout)
   444  }
   445  
   446  // WaitForPodRunningInNamespaceSlow waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running.
   447  // The resourceVersion is used when Watching object changes, it tells since when we care
   448  // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state.
   449  func WaitForPodRunningInNamespaceSlow(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   450  	return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, slowPodStartTimeout)
   451  }
   452  
   453  // WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running.
   454  // It does not need to exist yet when this function gets called and the pod is not expected to be recreated
   455  // when it succeeds or fails.
   456  func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   457  	return framework.Gomega().Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(namespace).Get, podName, metav1.GetOptions{}))).
   458  		WithTimeout(timeout).
   459  		Should(BeRunningNoRetries())
   460  }
   461  
   462  // WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running.
   463  // Returns an error if timeout occurs first, or pod goes in to failed state.
   464  func WaitForPodRunningInNamespace(ctx context.Context, c clientset.Interface, pod *v1.Pod) error {
   465  	if pod.Status.Phase == v1.PodRunning {
   466  		return nil
   467  	}
   468  	return WaitTimeoutForPodRunningInNamespace(ctx, c, pod.Name, pod.Namespace, podStartTimeout)
   469  }
   470  
   471  // WaitTimeoutForPodNoLongerRunningInNamespace waits the given timeout duration for the specified pod to stop.
   472  func WaitTimeoutForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   473  	return WaitForPodCondition(ctx, c, namespace, podName, "completed", timeout, func(pod *v1.Pod) (bool, error) {
   474  		switch pod.Status.Phase {
   475  		case v1.PodFailed, v1.PodSucceeded:
   476  			return true, nil
   477  		}
   478  		return false, nil
   479  	})
   480  }
   481  
   482  // WaitForPodNoLongerRunningInNamespace waits default amount of time (defaultPodDeletionTimeout) for the specified pod to stop running.
   483  // Returns an error if timeout occurs first.
   484  func WaitForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   485  	return WaitTimeoutForPodNoLongerRunningInNamespace(ctx, c, podName, namespace, defaultPodDeletionTimeout)
   486  }
   487  
   488  // WaitTimeoutForPodReadyInNamespace waits the given timeout duration for the
   489  // specified pod to be ready and running.
   490  func WaitTimeoutForPodReadyInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   491  	return WaitForPodCondition(ctx, c, namespace, podName, "running and ready", timeout, func(pod *v1.Pod) (bool, error) {
   492  		switch pod.Status.Phase {
   493  		case v1.PodFailed, v1.PodSucceeded:
   494  			return false, gomega.StopTrying(fmt.Sprintf("The phase of Pod %s is %s which is unexpected.", pod.Name, pod.Status.Phase))
   495  		case v1.PodRunning:
   496  			return podutils.IsPodReady(pod), nil
   497  		}
   498  		return false, nil
   499  	})
   500  }
   501  
   502  // WaitForPodNotPending returns an error if it took too long for the pod to go out of pending state.
   503  // The resourceVersion is used when Watching object changes, it tells since when we care
   504  // about changes to the pod.
   505  func WaitForPodNotPending(ctx context.Context, c clientset.Interface, ns, podName string) error {
   506  	return WaitForPodCondition(ctx, c, ns, podName, "not pending", podStartTimeout, func(pod *v1.Pod) (bool, error) {
   507  		switch pod.Status.Phase {
   508  		case v1.PodPending:
   509  			return false, nil
   510  		default:
   511  			return true, nil
   512  		}
   513  	})
   514  }
   515  
   516  // WaitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or until podStartupTimeout.
   517  func WaitForPodSuccessInNamespace(ctx context.Context, c clientset.Interface, podName string, namespace string) error {
   518  	return WaitForPodSuccessInNamespaceTimeout(ctx, c, podName, namespace, podStartTimeout)
   519  }
   520  
   521  // WaitForPodSuccessInNamespaceSlow returns nil if the pod reached state success, or an error if it reached failure or until slowPodStartupTimeout.
   522  func WaitForPodSuccessInNamespaceSlow(ctx context.Context, c clientset.Interface, podName string, namespace string) error {
   523  	return WaitForPodSuccessInNamespaceTimeout(ctx, c, podName, namespace, slowPodStartTimeout)
   524  }
   525  
   526  // WaitForPodNotFoundInNamespace returns an error if it takes too long for the pod to fully terminate.
   527  // Unlike `waitForPodTerminatedInNamespace`, the pod's Phase and Reason are ignored. If the pod Get
   528  // api returns IsNotFound then the wait stops and nil is returned. If the Get api returns an error other
   529  // than "not found" and that error is final, that error is returned and the wait stops.
   530  func WaitForPodNotFoundInNamespace(ctx context.Context, c clientset.Interface, podName, ns string, timeout time.Duration) error {
   531  	err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*v1.Pod, error) {
   532  		pod, err := c.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
   533  		if apierrors.IsNotFound(err) {
   534  			return nil, nil
   535  		}
   536  		return pod, err
   537  	})).WithTimeout(timeout).Should(gomega.BeNil())
   538  	if err != nil {
   539  		return fmt.Errorf("expected pod to not be found: %w", err)
   540  	}
   541  	return nil
   542  }
   543  
   544  // WaitForPodsResponding waits for the pods to response.
   545  func WaitForPodsResponding(ctx context.Context, c clientset.Interface, ns string, controllerName string, wantName bool, timeout time.Duration, pods *v1.PodList) error {
   546  	if timeout == 0 {
   547  		timeout = podRespondingTimeout
   548  	}
   549  	ginkgo.By("trying to dial each unique pod")
   550  	label := labels.SelectorFromSet(labels.Set(map[string]string{"name": controllerName}))
   551  	options := metav1.ListOptions{LabelSelector: label.String()}
   552  
   553  	type response struct {
   554  		podName  string
   555  		response string
   556  	}
   557  
   558  	get := func(ctx context.Context) ([]response, error) {
   559  		currentPods, err := c.CoreV1().Pods(ns).List(ctx, options)
   560  		if err != nil {
   561  			return nil, fmt.Errorf("list pods: %w", err)
   562  		}
   563  
   564  		var responses []response
   565  		for _, pod := range pods.Items {
   566  			// Check that the replica list remains unchanged, otherwise we have problems.
   567  			if !isElementOf(pod.UID, currentPods) {
   568  				return nil, gomega.StopTrying(fmt.Sprintf("Pod with UID %s is no longer a member of the replica set. Must have been restarted for some reason.\nCurrent replica set:\n%s", pod.UID, format.Object(currentPods, 1)))
   569  			}
   570  
   571  			ctxUntil, cancel := context.WithTimeout(ctx, singleCallTimeout)
   572  			defer cancel()
   573  
   574  			body, err := c.CoreV1().RESTClient().Get().
   575  				Namespace(ns).
   576  				Resource("pods").
   577  				SubResource("proxy").
   578  				Name(string(pod.Name)).
   579  				Do(ctxUntil).
   580  				Raw()
   581  
   582  			if err != nil {
   583  				// We may encounter errors here because of a race between the pod readiness and apiserver
   584  				// proxy or because of temporary failures. The error gets wrapped for framework.HandleRetry.
   585  				// Gomega+Ginkgo will handle logging.
   586  				return nil, fmt.Errorf("controller %s: failed to Get from replica pod %s:\n%w\nPod status:\n%s",
   587  					controllerName, pod.Name,
   588  					err, format.Object(pod.Status, 1))
   589  			}
   590  			responses = append(responses, response{podName: pod.Name, response: string(body)})
   591  		}
   592  		return responses, nil
   593  	}
   594  
   595  	match := func(responses []response) (func() string, error) {
   596  		// The response checker expects the pod's name unless !respondName, in
   597  		// which case it just checks for a non-empty response.
   598  		var unexpected []response
   599  		for _, response := range responses {
   600  			if wantName {
   601  				if response.response != response.podName {
   602  					unexpected = append(unexpected, response)
   603  				}
   604  			} else {
   605  				if len(response.response) == 0 {
   606  					unexpected = append(unexpected, response)
   607  				}
   608  			}
   609  		}
   610  		if len(unexpected) > 0 {
   611  			return func() string {
   612  				what := "some response"
   613  				if wantName {
   614  					what = "the pod's own name as response"
   615  				}
   616  				return fmt.Sprintf("Wanted %s, but the following pods replied with something else:\n%s", what, format.Object(unexpected, 1))
   617  			}, nil
   618  		}
   619  		return nil, nil
   620  	}
   621  
   622  	err := framework.Gomega().
   623  		Eventually(ctx, framework.HandleRetry(get)).
   624  		WithTimeout(timeout).
   625  		Should(framework.MakeMatcher(match))
   626  	if err != nil {
   627  		return fmt.Errorf("checking pod responses: %w", err)
   628  	}
   629  	return nil
   630  }
   631  
   632  func isElementOf(podUID apitypes.UID, pods *v1.PodList) bool {
   633  	for _, pod := range pods.Items {
   634  		if pod.UID == podUID {
   635  			return true
   636  		}
   637  	}
   638  	return false
   639  }
   640  
   641  // WaitForNumberOfPods waits up to timeout to ensure there are exact
   642  // `num` pods in namespace `ns`.
   643  // It returns the matching Pods or a timeout error.
   644  func WaitForNumberOfPods(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) (pods *v1.PodList, err error) {
   645  	return WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, podScheduledBeforeTimeout, "exist", func(pod *v1.Pod) bool {
   646  		return true
   647  	})
   648  }
   649  
   650  // WaitForPodsWithLabelScheduled waits for all matching pods to become scheduled and at least one
   651  // matching pod exists.  Return the list of matching pods.
   652  func WaitForPodsWithLabelScheduled(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (pods *v1.PodList, err error) {
   653  	opts := metav1.ListOptions{LabelSelector: label.String()}
   654  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1, AllMatching: true}, podScheduledBeforeTimeout, "be scheduled", func(pod *v1.Pod) bool {
   655  		return pod.Spec.NodeName != ""
   656  	})
   657  }
   658  
   659  // WaitForPodsWithLabel waits up to podListTimeout for getting pods with certain label
   660  func WaitForPodsWithLabel(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (*v1.PodList, error) {
   661  	opts := metav1.ListOptions{LabelSelector: label.String()}
   662  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1}, podListTimeout, "exist", func(pod *v1.Pod) bool {
   663  		return true
   664  	})
   665  }
   666  
   667  // WaitForPodsWithLabelRunningReady waits for exact amount of matching pods to become running and ready.
   668  // Return the list of matching pods.
   669  func WaitForPodsWithLabelRunningReady(ctx context.Context, c clientset.Interface, ns string, label labels.Selector, num int, timeout time.Duration) (pods *v1.PodList, err error) {
   670  	opts := metav1.ListOptions{LabelSelector: label.String()}
   671  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: num, AllMatching: true}, timeout, "be running and ready", RunningReady)
   672  }
   673  
   674  // WaitForNRestartablePods tries to list restarting pods using ps until it finds expect of them,
   675  // returning their names if it can do so before timeout.
   676  func WaitForNRestartablePods(ctx context.Context, ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) {
   677  	var pods []*v1.Pod
   678  
   679  	get := func(ctx context.Context) ([]*v1.Pod, error) {
   680  		return ps.List(), nil
   681  	}
   682  
   683  	match := func(allPods []*v1.Pod) (func() string, error) {
   684  		pods = FilterNonRestartablePods(allPods)
   685  		if len(pods) != expect {
   686  			return func() string {
   687  				return fmt.Sprintf("expected to find non-restartable %d pods, but found %d:\n%s", expect, len(pods), format.Object(pods, 1))
   688  			}, nil
   689  		}
   690  		return nil, nil
   691  	}
   692  
   693  	err := framework.Gomega().
   694  		Eventually(ctx, framework.HandleRetry(get)).
   695  		WithTimeout(timeout).
   696  		Should(framework.MakeMatcher(match))
   697  	if err != nil {
   698  		return nil, err
   699  	}
   700  
   701  	podNames := make([]string, len(pods))
   702  	for i, p := range pods {
   703  		podNames[i] = p.Name
   704  	}
   705  	return podNames, nil
   706  }
   707  
   708  // WaitForPodContainerToFail waits for the given Pod container to fail with the given reason, specifically due to
   709  // invalid container configuration. In this case, the container will remain in a waiting state with a specific
   710  // reason set, which should match the given reason.
   711  func WaitForPodContainerToFail(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, reason string, timeout time.Duration) error {
   712  	conditionDesc := fmt.Sprintf("container %d failed with reason %s", containerIndex, reason)
   713  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   714  		switch pod.Status.Phase {
   715  		case v1.PodPending:
   716  			if len(pod.Status.ContainerStatuses) == 0 {
   717  				return false, nil
   718  			}
   719  			containerStatus := pod.Status.ContainerStatuses[containerIndex]
   720  			if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == reason {
   721  				return true, nil
   722  			}
   723  			return false, nil
   724  		case v1.PodFailed, v1.PodRunning, v1.PodSucceeded:
   725  			return false, fmt.Errorf("pod was expected to be pending, but it is in the state: %s", pod.Status.Phase)
   726  		}
   727  		return false, nil
   728  	})
   729  }
   730  
   731  // WaitForPodScheduled waits for the pod to be schedule, ie. the .spec.nodeName is set
   732  func WaitForPodScheduled(ctx context.Context, c clientset.Interface, namespace, podName string) error {
   733  	return WaitForPodCondition(ctx, c, namespace, podName, "pod is scheduled", podScheduledBeforeTimeout, func(pod *v1.Pod) (bool, error) {
   734  		return pod.Spec.NodeName != "", nil
   735  	})
   736  }
   737  
   738  // WaitForPodContainerStarted waits for the given Pod container to start, after a successful run of the startupProbe.
   739  func WaitForPodContainerStarted(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, timeout time.Duration) error {
   740  	conditionDesc := fmt.Sprintf("container %d started", containerIndex)
   741  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   742  		if containerIndex > len(pod.Status.ContainerStatuses)-1 {
   743  			return false, nil
   744  		}
   745  		containerStatus := pod.Status.ContainerStatuses[containerIndex]
   746  		return *containerStatus.Started, nil
   747  	})
   748  }
   749  
   750  // WaitForPodFailedReason wait for pod failed reason in status, for example "SysctlForbidden".
   751  func WaitForPodFailedReason(ctx context.Context, c clientset.Interface, pod *v1.Pod, reason string, timeout time.Duration) error {
   752  	conditionDesc := fmt.Sprintf("failed with reason %s", reason)
   753  	return WaitForPodCondition(ctx, c, pod.Namespace, pod.Name, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   754  		switch pod.Status.Phase {
   755  		case v1.PodSucceeded:
   756  			return true, errors.New("pod succeeded unexpectedly")
   757  		case v1.PodFailed:
   758  			if pod.Status.Reason == reason {
   759  				return true, nil
   760  			} else {
   761  				return true, fmt.Errorf("pod failed with reason %s", pod.Status.Reason)
   762  			}
   763  		}
   764  		return false, nil
   765  	})
   766  }
   767  
   768  // WaitForContainerRunning waits for the given Pod container to have a state of running
   769  func WaitForContainerRunning(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error {
   770  	conditionDesc := fmt.Sprintf("container %s running", containerName)
   771  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   772  		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
   773  			for _, cs := range statuses {
   774  				if cs.Name == containerName {
   775  					return cs.State.Running != nil, nil
   776  				}
   777  			}
   778  		}
   779  		return false, nil
   780  	})
   781  }