k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/framework/pod/wait.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package pod
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"reflect"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/onsi/ginkgo/v2"
    28  	"github.com/onsi/gomega"
    29  	"github.com/onsi/gomega/gcustom"
    30  	"github.com/onsi/gomega/types"
    31  
    32  	appsv1 "k8s.io/api/apps/v1"
    33  	v1 "k8s.io/api/core/v1"
    34  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    35  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    36  	"k8s.io/apimachinery/pkg/labels"
    37  	apitypes "k8s.io/apimachinery/pkg/types"
    38  	clientset "k8s.io/client-go/kubernetes"
    39  	"k8s.io/kubectl/pkg/util/podutils"
    40  	"k8s.io/kubernetes/test/e2e/framework"
    41  	testutils "k8s.io/kubernetes/test/utils"
    42  	"k8s.io/kubernetes/test/utils/format"
    43  )
    44  
    45  const (
    46  	// defaultPodDeletionTimeout is the default timeout for deleting pod.
    47  	defaultPodDeletionTimeout = 3 * time.Minute
    48  
    49  	// podListTimeout is how long to wait for the pod to be listable.
    50  	podListTimeout = time.Minute
    51  
    52  	podRespondingTimeout = 15 * time.Minute
    53  
    54  	// How long pods have to become scheduled onto nodes
    55  	podScheduledBeforeTimeout = podListTimeout + (20 * time.Second)
    56  
    57  	// podStartTimeout is how long to wait for the pod to be started.
    58  	podStartTimeout = 5 * time.Minute
    59  
    60  	// singleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent
    61  	// transient failures from failing tests.
    62  	singleCallTimeout = 5 * time.Minute
    63  
    64  	// Some pods can take much longer to get ready due to volume attach/detach latency.
    65  	slowPodStartTimeout = 15 * time.Minute
    66  )
    67  
    68  type podCondition func(pod *v1.Pod) (bool, error)
    69  
    70  // BeRunningNoRetries verifies that a pod starts running. It's a permanent
    71  // failure when the pod enters some other permanent phase.
    72  func BeRunningNoRetries() types.GomegaMatcher {
    73  	return gomega.And(
    74  		// This additional matcher checks for the final error condition.
    75  		gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) {
    76  			switch pod.Status.Phase {
    77  			case v1.PodFailed, v1.PodSucceeded:
    78  				return false, gomega.StopTrying(fmt.Sprintf("Expected pod to reach phase %q, got final phase %q instead:\n%s", v1.PodRunning, pod.Status.Phase, format.Object(pod, 1)))
    79  			default:
    80  				return true, nil
    81  			}
    82  		}),
    83  		BeInPhase(v1.PodRunning),
    84  	)
    85  }
    86  
    87  // BeInPhase matches if pod.status.phase is the expected phase.
    88  func BeInPhase(phase v1.PodPhase) types.GomegaMatcher {
    89  	// A simple implementation of this would be:
    90  	// return gomega.HaveField("Status.Phase", phase)
    91  	//
    92  	// But that produces a fairly generic
    93  	//     Value for field 'Status.Phase' failed to satisfy matcher.
    94  	// failure message and doesn't show the pod. We can do better than
    95  	// that with a custom matcher.
    96  
    97  	return gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) {
    98  		return pod.Status.Phase == phase, nil
    99  	}).WithTemplate("Expected Pod {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(phase)
   100  }
   101  
   102  // WaitForAlmostAllReady waits up to timeout for the following conditions:
   103  // 1. At least minPods Pods in Namespace ns are Running and Ready
   104  // 2. All Pods in Namespace ns are either Ready or Succeeded
   105  // 3. All Pods part of a ReplicaSet or ReplicationController in Namespace ns are Ready
   106  //
   107  // After the timeout has elapsed, an error is returned if the number of Pods in a Pending Phase
   108  // is greater than allowedNotReadyPods.
   109  //
   110  // It is generally recommended to use WaitForPodsRunningReady instead of this function
   111  // whenever possible, because its behavior is more intuitive. Similar to WaitForPodsRunningReady,
   112  // this function requests the list of pods on every iteration, making it useful for situations
   113  // where the set of Pods is likely changing, such as during cluster startup.
   114  //
   115  // If minPods or allowedNotReadyPods are -1, this method returns immediately
   116  // without waiting.
   117  func WaitForAlmostAllPodsReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int, timeout time.Duration) error {
   118  	if minPods == -1 || allowedNotReadyPods == -1 {
   119  		return nil
   120  	}
   121  
   122  	// We get the new list of pods, replication controllers, and replica
   123  	// sets in every iteration because more pods come online during startup
   124  	// and we want to ensure they are also checked.
   125  	//
   126  	// This struct gets populated while polling, then gets checked, and in
   127  	// case of a timeout is included in the failure message.
   128  	type state struct {
   129  		ReplicationControllers []v1.ReplicationController
   130  		ReplicaSets            []appsv1.ReplicaSet
   131  		Pods                   []v1.Pod
   132  	}
   133  
   134  	nOk := 0
   135  	badPods := []v1.Pod{}
   136  	otherPods := []v1.Pod{}
   137  	succeededPods := []string{}
   138  
   139  	err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) {
   140  
   141  		rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{})
   142  		if err != nil {
   143  			return nil, fmt.Errorf("listing replication controllers in namespace %s: %w", ns, err)
   144  		}
   145  		rsList, err := c.AppsV1().ReplicaSets(ns).List(ctx, metav1.ListOptions{})
   146  		if err != nil {
   147  			return nil, fmt.Errorf("listing replication sets in namespace %s: %w", ns, err)
   148  		}
   149  		podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
   150  		if err != nil {
   151  			return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err)
   152  		}
   153  		return &state{
   154  			ReplicationControllers: rcList.Items,
   155  			ReplicaSets:            rsList.Items,
   156  			Pods:                   podList.Items,
   157  		}, nil
   158  	})).WithTimeout(timeout).Should(framework.MakeMatcher(func(s *state) (func() string, error) {
   159  		replicas, replicaOk := int32(0), int32(0)
   160  		for _, rc := range s.ReplicationControllers {
   161  			replicas += *rc.Spec.Replicas
   162  			replicaOk += rc.Status.ReadyReplicas
   163  		}
   164  		for _, rs := range s.ReplicaSets {
   165  			replicas += *rs.Spec.Replicas
   166  			replicaOk += rs.Status.ReadyReplicas
   167  		}
   168  
   169  		nOk = 0
   170  		badPods = []v1.Pod{}
   171  		otherPods = []v1.Pod{}
   172  		succeededPods = []string{}
   173  		for _, pod := range s.Pods {
   174  			res, err := testutils.PodRunningReady(&pod)
   175  			switch {
   176  			case res && err == nil:
   177  				nOk++
   178  			case pod.Status.Phase == v1.PodSucceeded:
   179  				// it doesn't make sense to wait for this pod
   180  				succeededPods = append(succeededPods, pod.Name)
   181  			case pod.Status.Phase == v1.PodFailed:
   182  				// ignore failed pods that are controlled by some controller
   183  				if metav1.GetControllerOf(&pod) == nil {
   184  					badPods = append(badPods, pod)
   185  				}
   186  			default:
   187  				otherPods = append(otherPods, pod)
   188  			}
   189  		}
   190  		done := replicaOk == replicas && nOk >= minPods && (len(badPods)+len(otherPods)) == 0
   191  		if done {
   192  			return nil, nil
   193  		}
   194  
   195  		// Delayed formatting of a failure message.
   196  		return func() string {
   197  			var buffer strings.Builder
   198  			buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready (except for %d).\n", minPods, ns, allowedNotReadyPods))
   199  			buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(s.Pods)))
   200  			buffer.WriteString(fmt.Sprintf("Expected %d pod replicas, %d are Running and Ready.\n", replicas, replicaOk))
   201  			if len(succeededPods) > 0 {
   202  				buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1)))
   203  			}
   204  			if len(badPods) > 0 {
   205  				buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(badPods, 1)))
   206  			}
   207  			if len(otherPods) > 0 {
   208  				buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1)))
   209  			}
   210  			return buffer.String()
   211  		}, nil
   212  	}))
   213  
   214  	// An error might not be fatal.
   215  	if len(otherPods) <= allowedNotReadyPods {
   216  		return nil
   217  	}
   218  	return err
   219  }
   220  
   221  // WaitForPodsRunningReady waits up to timeout for the following conditions:
   222  //  1. At least minPods Pods in Namespace ns are Running and Ready
   223  //  2. No Pods in Namespace ns are Failed and not owned by a controller or Pending
   224  //
   225  // An error is returned if either of these conditions are not met within the timeout.
   226  //
   227  // It has separate behavior from other 'wait for' pods functions in
   228  // that it requests the list of pods on every iteration. This is useful, for
   229  // example, in cluster startup, because the number of pods increases while
   230  // waiting. All pods that are in SUCCESS state are not counted.
   231  func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods int, timeout time.Duration) error {
   232  
   233  	return framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) ([]v1.Pod, error) {
   234  
   235  		podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
   236  		if err != nil {
   237  			return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err)
   238  		}
   239  		return podList.Items, nil
   240  	})).WithTimeout(timeout).Should(framework.MakeMatcher(func(pods []v1.Pod) (func() string, error) {
   241  
   242  		nOk := 0
   243  		badPods := []v1.Pod{}
   244  		otherPods := []v1.Pod{}
   245  		succeededPods := []string{}
   246  
   247  		for _, pod := range pods {
   248  			res, err := testutils.PodRunningReady(&pod)
   249  			switch {
   250  			case res && err == nil:
   251  				nOk++
   252  			case pod.Status.Phase == v1.PodSucceeded:
   253  				// ignore succeeded pods
   254  				succeededPods = append(succeededPods, pod.Name)
   255  			case pod.Status.Phase == v1.PodFailed:
   256  				// ignore failed pods that are controlled by some controller
   257  				if metav1.GetControllerOf(&pod) == nil {
   258  					badPods = append(badPods, pod)
   259  				}
   260  			default:
   261  				otherPods = append(otherPods, pod)
   262  			}
   263  		}
   264  		if nOk >= minPods && len(badPods)+len(otherPods) == 0 {
   265  			return nil, nil
   266  		}
   267  
   268  		// Delayed formatting of a failure message.
   269  		return func() string {
   270  			var buffer strings.Builder
   271  			buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready \n", minPods, ns))
   272  			buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(pods)))
   273  			if len(succeededPods) > 0 {
   274  				buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1)))
   275  			}
   276  			if len(badPods) > 0 {
   277  				buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(badPods, 1)))
   278  			}
   279  			if len(otherPods) > 0 {
   280  				buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1)))
   281  			}
   282  			return buffer.String()
   283  		}, nil
   284  	}))
   285  
   286  }
   287  
   288  // WaitForPodCondition waits a pods to be matched to the given condition.
   289  // The condition callback may use gomega.StopTrying to abort early.
   290  func WaitForPodCondition(ctx context.Context, c clientset.Interface, ns, podName, conditionDesc string, timeout time.Duration, condition podCondition) error {
   291  	return framework.Gomega().
   292  		Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(ns).Get, podName, metav1.GetOptions{}))).
   293  		WithTimeout(timeout).
   294  		Should(framework.MakeMatcher(func(pod *v1.Pod) (func() string, error) {
   295  			done, err := condition(pod)
   296  			if err != nil {
   297  				return nil, err
   298  			}
   299  			if done {
   300  				return nil, nil
   301  			}
   302  			return func() string {
   303  				return fmt.Sprintf("expected pod to be %s, got instead:\n%s", conditionDesc, format.Object(pod, 1))
   304  			}, nil
   305  		}))
   306  }
   307  
   308  // Range determines how many items must exist and how many must match a certain
   309  // condition. Values <= 0 are ignored.
   310  // TODO (?): move to test/e2e/framework/range
   311  type Range struct {
   312  	// MinMatching must be <= actual matching items or <= 0.
   313  	MinMatching int
   314  	// MaxMatching must be >= actual matching items or <= 0.
   315  	// To check for "no matching items", set NonMatching.
   316  	MaxMatching int
   317  	// NoneMatching indicates that no item must match.
   318  	NoneMatching bool
   319  	// AllMatching indicates that all items must match.
   320  	AllMatching bool
   321  	// MinFound must be <= existing items or <= 0.
   322  	MinFound int
   323  }
   324  
   325  // Min returns how many items must exist.
   326  func (r Range) Min() int {
   327  	min := r.MinMatching
   328  	if min < r.MinFound {
   329  		min = r.MinFound
   330  	}
   331  	return min
   332  }
   333  
   334  // WaitForPods waits for pods in the given namespace to match the given
   335  // condition. How many pods must exist and how many must match the condition
   336  // is determined by the range parameter. The condition callback may use
   337  // gomega.StopTrying(...).Now() to abort early. The condition description
   338  // will be used with "expected pods to <description>".
   339  func WaitForPods(ctx context.Context, c clientset.Interface, ns string, opts metav1.ListOptions, r Range, timeout time.Duration, conditionDesc string, condition func(*v1.Pod) bool) (*v1.PodList, error) {
   340  	var finalPods *v1.PodList
   341  	minPods := r.Min()
   342  	match := func(pods *v1.PodList) (func() string, error) {
   343  		finalPods = pods
   344  
   345  		if len(pods.Items) < minPods {
   346  			return func() string {
   347  				return fmt.Sprintf("expected at least %d pods, only got %d", minPods, len(pods.Items))
   348  			}, nil
   349  		}
   350  
   351  		var nonMatchingPods, matchingPods []v1.Pod
   352  		for _, pod := range pods.Items {
   353  			if condition(&pod) {
   354  				matchingPods = append(matchingPods, pod)
   355  			} else {
   356  				nonMatchingPods = append(nonMatchingPods, pod)
   357  			}
   358  		}
   359  		matching := len(pods.Items) - len(nonMatchingPods)
   360  		if matching < r.MinMatching && r.MinMatching > 0 {
   361  			return func() string {
   362  				return fmt.Sprintf("expected at least %d pods to %s, %d out of %d were not:\n%s",
   363  					r.MinMatching, conditionDesc, len(nonMatchingPods), len(pods.Items),
   364  					format.Object(nonMatchingPods, 1))
   365  			}, nil
   366  		}
   367  		if len(nonMatchingPods) > 0 && r.AllMatching {
   368  			return func() string {
   369  				return fmt.Sprintf("expected all pods to %s, %d out of %d were not:\n%s",
   370  					conditionDesc, len(nonMatchingPods), len(pods.Items),
   371  					format.Object(nonMatchingPods, 1))
   372  			}, nil
   373  		}
   374  		if matching > r.MaxMatching && r.MaxMatching > 0 {
   375  			return func() string {
   376  				return fmt.Sprintf("expected at most %d pods to %s, %d out of %d were:\n%s",
   377  					r.MinMatching, conditionDesc, len(matchingPods), len(pods.Items),
   378  					format.Object(matchingPods, 1))
   379  			}, nil
   380  		}
   381  		if matching > 0 && r.NoneMatching {
   382  			return func() string {
   383  				return fmt.Sprintf("expected no pods to %s, %d out of %d were:\n%s",
   384  					conditionDesc, len(matchingPods), len(pods.Items),
   385  					format.Object(matchingPods, 1))
   386  			}, nil
   387  		}
   388  		return nil, nil
   389  	}
   390  
   391  	err := framework.Gomega().
   392  		Eventually(ctx, framework.ListObjects(c.CoreV1().Pods(ns).List, opts)).
   393  		WithTimeout(timeout).
   394  		Should(framework.MakeMatcher(match))
   395  	return finalPods, err
   396  }
   397  
   398  // RunningReady checks whether pod p's phase is running and it has a ready
   399  // condition of status true.
   400  func RunningReady(p *v1.Pod) bool {
   401  	return p.Status.Phase == v1.PodRunning && podutils.IsPodReady(p)
   402  }
   403  
   404  // WaitForPodsRunning waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` are running.
   405  func WaitForPodsRunning(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) error {
   406  	_, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   407  		"be running and ready", func(pod *v1.Pod) bool {
   408  			ready, _ := testutils.PodRunningReady(pod)
   409  			return ready
   410  		})
   411  	return err
   412  }
   413  
   414  // WaitForPodsSchedulingGated waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` stay in scheduling gated state.
   415  func WaitForPodsSchedulingGated(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) error {
   416  	_, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   417  		"be in scheduling gated state", func(pod *v1.Pod) bool {
   418  			for _, condition := range pod.Status.Conditions {
   419  				if condition.Type == v1.PodScheduled && condition.Reason == v1.PodReasonSchedulingGated {
   420  					return true
   421  				}
   422  			}
   423  			return false
   424  		})
   425  	return err
   426  }
   427  
   428  // WaitForPodsWithSchedulingGates waits for a given `timeout` to evaluate if a certain amount of pods in given `ns`
   429  // match the given `schedulingGates`stay in scheduling gated state.
   430  func WaitForPodsWithSchedulingGates(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration, schedulingGates []v1.PodSchedulingGate) error {
   431  	_, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout,
   432  		"have certain scheduling gates", func(pod *v1.Pod) bool {
   433  			return reflect.DeepEqual(pod.Spec.SchedulingGates, schedulingGates)
   434  		})
   435  	return err
   436  }
   437  
   438  // WaitForPodTerminatedInNamespace returns an error if it takes too long for the pod to terminate,
   439  // if the pod Get api returns an error (IsNotFound or other), or if the pod failed (and thus did not
   440  // terminate) with an unexpected reason. Typically called to test that the passed-in pod is fully
   441  // terminated (reason==""), but may be called to detect if a pod did *not* terminate according to
   442  // the supplied reason.
   443  func WaitForPodTerminatedInNamespace(ctx context.Context, c clientset.Interface, podName, reason, namespace string) error {
   444  	return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("terminated with reason %s", reason), podStartTimeout, func(pod *v1.Pod) (bool, error) {
   445  		// Only consider Failed pods. Successful pods will be deleted and detected in
   446  		// waitForPodCondition's Get call returning `IsNotFound`
   447  		if pod.Status.Phase == v1.PodFailed {
   448  			if pod.Status.Reason == reason { // short-circuit waitForPodCondition's loop
   449  				return true, nil
   450  			}
   451  			return true, fmt.Errorf("Expected pod %q in namespace %q to be terminated with reason %q, got reason: %q", podName, namespace, reason, pod.Status.Reason)
   452  		}
   453  		return false, nil
   454  	})
   455  }
   456  
   457  // WaitForPodTerminatingInNamespaceTimeout returns if the pod is terminating, or an error if it is not after the timeout.
   458  func WaitForPodTerminatingInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   459  	return WaitForPodCondition(ctx, c, namespace, podName, "is terminating", timeout, func(pod *v1.Pod) (bool, error) {
   460  		if pod.DeletionTimestamp != nil {
   461  			return true, nil
   462  		}
   463  		return false, nil
   464  	})
   465  }
   466  
   467  // WaitForPodSuccessInNamespaceTimeout returns nil if the pod reached state success, or an error if it reached failure or ran too long.
   468  func WaitForPodSuccessInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   469  	return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("%s or %s", v1.PodSucceeded, v1.PodFailed), timeout, func(pod *v1.Pod) (bool, error) {
   470  		if pod.DeletionTimestamp == nil && pod.Spec.RestartPolicy == v1.RestartPolicyAlways {
   471  			return true, gomega.StopTrying(fmt.Sprintf("pod %q will never terminate with a succeeded state since its restart policy is Always", podName))
   472  		}
   473  		switch pod.Status.Phase {
   474  		case v1.PodSucceeded:
   475  			ginkgo.By("Saw pod success")
   476  			return true, nil
   477  		case v1.PodFailed:
   478  			return true, gomega.StopTrying(fmt.Sprintf("pod %q failed with status: %+v", podName, pod.Status))
   479  		default:
   480  			return false, nil
   481  		}
   482  	})
   483  }
   484  
   485  // WaitForPodNameUnschedulableInNamespace returns an error if it takes too long for the pod to become Pending
   486  // and have condition Status equal to Unschedulable,
   487  // if the pod Get api returns an error (IsNotFound or other), or if the pod failed with an unexpected reason.
   488  // Typically called to test that the passed-in pod is Pending and Unschedulable.
   489  func WaitForPodNameUnschedulableInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   490  	return WaitForPodCondition(ctx, c, namespace, podName, v1.PodReasonUnschedulable, podStartTimeout, func(pod *v1.Pod) (bool, error) {
   491  		// Only consider Failed pods. Successful pods will be deleted and detected in
   492  		// waitForPodCondition's Get call returning `IsNotFound`
   493  		if pod.Status.Phase == v1.PodPending {
   494  			for _, cond := range pod.Status.Conditions {
   495  				if cond.Type == v1.PodScheduled && cond.Status == v1.ConditionFalse && cond.Reason == v1.PodReasonUnschedulable {
   496  					return true, nil
   497  				}
   498  			}
   499  		}
   500  		if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
   501  			return true, fmt.Errorf("Expected pod %q in namespace %q to be in phase Pending, but got phase: %v", podName, namespace, pod.Status.Phase)
   502  		}
   503  		return false, nil
   504  	})
   505  }
   506  
   507  // WaitForPodNameRunningInNamespace waits default amount of time (PodStartTimeout) for the specified pod to become running.
   508  // Returns an error if timeout occurs first, or pod goes in to failed state.
   509  func WaitForPodNameRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   510  	return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, podStartTimeout)
   511  }
   512  
   513  // WaitForPodRunningInNamespaceSlow waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running.
   514  // The resourceVersion is used when Watching object changes, it tells since when we care
   515  // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state.
   516  func WaitForPodRunningInNamespaceSlow(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   517  	return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, slowPodStartTimeout)
   518  }
   519  
   520  // WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running.
   521  // It does not need to exist yet when this function gets called and the pod is not expected to be recreated
   522  // when it succeeds or fails.
   523  func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   524  	return framework.Gomega().Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(namespace).Get, podName, metav1.GetOptions{}))).
   525  		WithTimeout(timeout).
   526  		Should(BeRunningNoRetries())
   527  }
   528  
   529  // WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running.
   530  // Returns an error if timeout occurs first, or pod goes in to failed state.
   531  func WaitForPodRunningInNamespace(ctx context.Context, c clientset.Interface, pod *v1.Pod) error {
   532  	if pod.Status.Phase == v1.PodRunning {
   533  		return nil
   534  	}
   535  	return WaitTimeoutForPodRunningInNamespace(ctx, c, pod.Name, pod.Namespace, podStartTimeout)
   536  }
   537  
   538  // WaitTimeoutForPodNoLongerRunningInNamespace waits the given timeout duration for the specified pod to stop.
   539  func WaitTimeoutForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   540  	return WaitForPodCondition(ctx, c, namespace, podName, "completed", timeout, func(pod *v1.Pod) (bool, error) {
   541  		switch pod.Status.Phase {
   542  		case v1.PodFailed, v1.PodSucceeded:
   543  			return true, nil
   544  		}
   545  		return false, nil
   546  	})
   547  }
   548  
   549  // WaitForPodNoLongerRunningInNamespace waits default amount of time (defaultPodDeletionTimeout) for the specified pod to stop running.
   550  // Returns an error if timeout occurs first.
   551  func WaitForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error {
   552  	return WaitTimeoutForPodNoLongerRunningInNamespace(ctx, c, podName, namespace, defaultPodDeletionTimeout)
   553  }
   554  
   555  // WaitTimeoutForPodReadyInNamespace waits the given timeout duration for the
   556  // specified pod to be ready and running.
   557  func WaitTimeoutForPodReadyInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error {
   558  	return WaitForPodCondition(ctx, c, namespace, podName, "running and ready", timeout, func(pod *v1.Pod) (bool, error) {
   559  		switch pod.Status.Phase {
   560  		case v1.PodFailed, v1.PodSucceeded:
   561  			return false, gomega.StopTrying(fmt.Sprintf("The phase of Pod %s is %s which is unexpected.", pod.Name, pod.Status.Phase))
   562  		case v1.PodRunning:
   563  			return podutils.IsPodReady(pod), nil
   564  		}
   565  		return false, nil
   566  	})
   567  }
   568  
   569  // WaitForPodNotPending returns an error if it took too long for the pod to go out of pending state.
   570  // The resourceVersion is used when Watching object changes, it tells since when we care
   571  // about changes to the pod.
   572  func WaitForPodNotPending(ctx context.Context, c clientset.Interface, ns, podName string) error {
   573  	return WaitForPodCondition(ctx, c, ns, podName, "not pending", podStartTimeout, func(pod *v1.Pod) (bool, error) {
   574  		switch pod.Status.Phase {
   575  		case v1.PodPending:
   576  			return false, nil
   577  		default:
   578  			return true, nil
   579  		}
   580  	})
   581  }
   582  
   583  // WaitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or until podStartupTimeout.
   584  func WaitForPodSuccessInNamespace(ctx context.Context, c clientset.Interface, podName string, namespace string) error {
   585  	return WaitForPodSuccessInNamespaceTimeout(ctx, c, podName, namespace, podStartTimeout)
   586  }
   587  
   588  // WaitForPodNotFoundInNamespace returns an error if it takes too long for the pod to fully terminate.
   589  // Unlike `waitForPodTerminatedInNamespace`, the pod's Phase and Reason are ignored. If the pod Get
   590  // api returns IsNotFound then the wait stops and nil is returned. If the Get api returns an error other
   591  // than "not found" and that error is final, that error is returned and the wait stops.
   592  func WaitForPodNotFoundInNamespace(ctx context.Context, c clientset.Interface, podName, ns string, timeout time.Duration) error {
   593  	err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*v1.Pod, error) {
   594  		pod, err := c.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
   595  		if apierrors.IsNotFound(err) {
   596  			return nil, nil
   597  		}
   598  		return pod, err
   599  	})).WithTimeout(timeout).Should(gomega.BeNil())
   600  	if err != nil {
   601  		return fmt.Errorf("expected pod to not be found: %w", err)
   602  	}
   603  	return nil
   604  }
   605  
   606  // WaitForPodsResponding waits for the pods to response.
   607  func WaitForPodsResponding(ctx context.Context, c clientset.Interface, ns string, controllerName string, wantName bool, timeout time.Duration, pods *v1.PodList) error {
   608  	if timeout == 0 {
   609  		timeout = podRespondingTimeout
   610  	}
   611  	ginkgo.By("trying to dial each unique pod")
   612  	label := labels.SelectorFromSet(labels.Set(map[string]string{"name": controllerName}))
   613  	options := metav1.ListOptions{LabelSelector: label.String()}
   614  
   615  	type response struct {
   616  		podName  string
   617  		response string
   618  	}
   619  
   620  	get := func(ctx context.Context) ([]response, error) {
   621  		currentPods, err := c.CoreV1().Pods(ns).List(ctx, options)
   622  		if err != nil {
   623  			return nil, fmt.Errorf("list pods: %w", err)
   624  		}
   625  
   626  		var responses []response
   627  		for _, pod := range pods.Items {
   628  			// Check that the replica list remains unchanged, otherwise we have problems.
   629  			if !isElementOf(pod.UID, currentPods) {
   630  				return nil, gomega.StopTrying(fmt.Sprintf("Pod with UID %s is no longer a member of the replica set. Must have been restarted for some reason.\nCurrent replica set:\n%s", pod.UID, format.Object(currentPods, 1)))
   631  			}
   632  
   633  			ctxUntil, cancel := context.WithTimeout(ctx, singleCallTimeout)
   634  			defer cancel()
   635  
   636  			body, err := c.CoreV1().RESTClient().Get().
   637  				Namespace(ns).
   638  				Resource("pods").
   639  				SubResource("proxy").
   640  				Name(string(pod.Name)).
   641  				Do(ctxUntil).
   642  				Raw()
   643  
   644  			if err != nil {
   645  				// We may encounter errors here because of a race between the pod readiness and apiserver
   646  				// proxy or because of temporary failures. The error gets wrapped for framework.HandleRetry.
   647  				// Gomega+Ginkgo will handle logging.
   648  				return nil, fmt.Errorf("controller %s: failed to Get from replica pod %s:\n%w\nPod status:\n%s",
   649  					controllerName, pod.Name,
   650  					err, format.Object(pod.Status, 1))
   651  			}
   652  			responses = append(responses, response{podName: pod.Name, response: string(body)})
   653  		}
   654  		return responses, nil
   655  	}
   656  
   657  	match := func(responses []response) (func() string, error) {
   658  		// The response checker expects the pod's name unless !respondName, in
   659  		// which case it just checks for a non-empty response.
   660  		var unexpected []response
   661  		for _, response := range responses {
   662  			if wantName {
   663  				if response.response != response.podName {
   664  					unexpected = append(unexpected, response)
   665  				}
   666  			} else {
   667  				if len(response.response) == 0 {
   668  					unexpected = append(unexpected, response)
   669  				}
   670  			}
   671  		}
   672  		if len(unexpected) > 0 {
   673  			return func() string {
   674  				what := "some response"
   675  				if wantName {
   676  					what = "the pod's own name as response"
   677  				}
   678  				return fmt.Sprintf("Wanted %s, but the following pods replied with something else:\n%s", what, format.Object(unexpected, 1))
   679  			}, nil
   680  		}
   681  		return nil, nil
   682  	}
   683  
   684  	err := framework.Gomega().
   685  		Eventually(ctx, framework.HandleRetry(get)).
   686  		WithTimeout(timeout).
   687  		Should(framework.MakeMatcher(match))
   688  	if err != nil {
   689  		return fmt.Errorf("checking pod responses: %w", err)
   690  	}
   691  	return nil
   692  }
   693  
   694  func isElementOf(podUID apitypes.UID, pods *v1.PodList) bool {
   695  	for _, pod := range pods.Items {
   696  		if pod.UID == podUID {
   697  			return true
   698  		}
   699  	}
   700  	return false
   701  }
   702  
   703  // WaitForNumberOfPods waits up to timeout to ensure there are exact
   704  // `num` pods in namespace `ns`.
   705  // It returns the matching Pods or a timeout error.
   706  func WaitForNumberOfPods(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) (pods *v1.PodList, err error) {
   707  	return WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, podScheduledBeforeTimeout, "exist", func(pod *v1.Pod) bool {
   708  		return true
   709  	})
   710  }
   711  
   712  // WaitForPodsWithLabelScheduled waits for all matching pods to become scheduled and at least one
   713  // matching pod exists.  Return the list of matching pods.
   714  func WaitForPodsWithLabelScheduled(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (pods *v1.PodList, err error) {
   715  	opts := metav1.ListOptions{LabelSelector: label.String()}
   716  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1, AllMatching: true}, podScheduledBeforeTimeout, "be scheduled", func(pod *v1.Pod) bool {
   717  		return pod.Spec.NodeName != ""
   718  	})
   719  }
   720  
   721  // WaitForPodsWithLabel waits up to podListTimeout for getting pods with certain label
   722  func WaitForPodsWithLabel(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (*v1.PodList, error) {
   723  	opts := metav1.ListOptions{LabelSelector: label.String()}
   724  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1}, podListTimeout, "exist", func(pod *v1.Pod) bool {
   725  		return true
   726  	})
   727  }
   728  
   729  // WaitForPodsWithLabelRunningReady waits for exact amount of matching pods to become running and ready.
   730  // Return the list of matching pods.
   731  func WaitForPodsWithLabelRunningReady(ctx context.Context, c clientset.Interface, ns string, label labels.Selector, num int, timeout time.Duration) (pods *v1.PodList, err error) {
   732  	opts := metav1.ListOptions{LabelSelector: label.String()}
   733  	return WaitForPods(ctx, c, ns, opts, Range{MinFound: num, AllMatching: true}, timeout, "be running and ready", RunningReady)
   734  }
   735  
   736  // WaitForNRestartablePods tries to list restarting pods using ps until it finds expect of them,
   737  // returning their names if it can do so before timeout.
   738  func WaitForNRestartablePods(ctx context.Context, ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) {
   739  	var pods []*v1.Pod
   740  
   741  	get := func(ctx context.Context) ([]*v1.Pod, error) {
   742  		return ps.List(), nil
   743  	}
   744  
   745  	match := func(allPods []*v1.Pod) (func() string, error) {
   746  		pods = FilterNonRestartablePods(allPods)
   747  		if len(pods) != expect {
   748  			return func() string {
   749  				return fmt.Sprintf("expected to find non-restartable %d pods, but found %d:\n%s", expect, len(pods), format.Object(pods, 1))
   750  			}, nil
   751  		}
   752  		return nil, nil
   753  	}
   754  
   755  	err := framework.Gomega().
   756  		Eventually(ctx, framework.HandleRetry(get)).
   757  		WithTimeout(timeout).
   758  		Should(framework.MakeMatcher(match))
   759  	if err != nil {
   760  		return nil, err
   761  	}
   762  
   763  	podNames := make([]string, len(pods))
   764  	for i, p := range pods {
   765  		podNames[i] = p.Name
   766  	}
   767  	return podNames, nil
   768  }
   769  
   770  // WaitForPodContainerToFail waits for the given Pod container to fail with the given reason, specifically due to
   771  // invalid container configuration. In this case, the container will remain in a waiting state with a specific
   772  // reason set, which should match the given reason.
   773  func WaitForPodContainerToFail(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, reason string, timeout time.Duration) error {
   774  	conditionDesc := fmt.Sprintf("container %d failed with reason %s", containerIndex, reason)
   775  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   776  		switch pod.Status.Phase {
   777  		case v1.PodPending:
   778  			if len(pod.Status.ContainerStatuses) == 0 {
   779  				return false, nil
   780  			}
   781  			containerStatus := pod.Status.ContainerStatuses[containerIndex]
   782  			if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == reason {
   783  				return true, nil
   784  			}
   785  			return false, nil
   786  		case v1.PodFailed, v1.PodRunning, v1.PodSucceeded:
   787  			return false, fmt.Errorf("pod was expected to be pending, but it is in the state: %s", pod.Status.Phase)
   788  		}
   789  		return false, nil
   790  	})
   791  }
   792  
   793  // WaitForPodScheduled waits for the pod to be schedule, ie. the .spec.nodeName is set
   794  func WaitForPodScheduled(ctx context.Context, c clientset.Interface, namespace, podName string) error {
   795  	return WaitForPodCondition(ctx, c, namespace, podName, "pod is scheduled", podScheduledBeforeTimeout, func(pod *v1.Pod) (bool, error) {
   796  		return pod.Spec.NodeName != "", nil
   797  	})
   798  }
   799  
   800  // WaitForPodContainerStarted waits for the given Pod container to start, after a successful run of the startupProbe.
   801  func WaitForPodContainerStarted(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, timeout time.Duration) error {
   802  	conditionDesc := fmt.Sprintf("container %d started", containerIndex)
   803  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   804  		if containerIndex > len(pod.Status.ContainerStatuses)-1 {
   805  			return false, nil
   806  		}
   807  		containerStatus := pod.Status.ContainerStatuses[containerIndex]
   808  		return *containerStatus.Started, nil
   809  	})
   810  }
   811  
   812  // WaitForPodFailedReason wait for pod failed reason in status, for example "SysctlForbidden".
   813  func WaitForPodFailedReason(ctx context.Context, c clientset.Interface, pod *v1.Pod, reason string, timeout time.Duration) error {
   814  	conditionDesc := fmt.Sprintf("failed with reason %s", reason)
   815  	return WaitForPodCondition(ctx, c, pod.Namespace, pod.Name, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   816  		switch pod.Status.Phase {
   817  		case v1.PodSucceeded:
   818  			return true, errors.New("pod succeeded unexpectedly")
   819  		case v1.PodFailed:
   820  			if pod.Status.Reason == reason {
   821  				return true, nil
   822  			} else {
   823  				return true, fmt.Errorf("pod failed with reason %s", pod.Status.Reason)
   824  			}
   825  		}
   826  		return false, nil
   827  	})
   828  }
   829  
   830  // WaitForContainerRunning waits for the given Pod container to have a state of running
   831  func WaitForContainerRunning(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error {
   832  	conditionDesc := fmt.Sprintf("container %s running", containerName)
   833  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   834  		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
   835  			for _, cs := range statuses {
   836  				if cs.Name == containerName {
   837  					return cs.State.Running != nil, nil
   838  				}
   839  			}
   840  		}
   841  		return false, nil
   842  	})
   843  }
   844  
   845  // WaitForContainerTerminated waits for the given Pod container to have a state of terminated
   846  func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error {
   847  	conditionDesc := fmt.Sprintf("container %s terminated", containerName)
   848  	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
   849  		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
   850  			for _, cs := range statuses {
   851  				if cs.Name == containerName {
   852  					return cs.State.Terminated != nil, nil
   853  				}
   854  			}
   855  		}
   856  		return false, nil
   857  	})
   858  }