k8s.io/kubernetes@v1.29.3/test/e2e/framework/pod/wait.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pod 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "reflect" 24 "strings" 25 "time" 26 27 "github.com/onsi/ginkgo/v2" 28 "github.com/onsi/gomega" 29 "github.com/onsi/gomega/gcustom" 30 "github.com/onsi/gomega/types" 31 32 appsv1 "k8s.io/api/apps/v1" 33 v1 "k8s.io/api/core/v1" 34 apierrors "k8s.io/apimachinery/pkg/api/errors" 35 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 "k8s.io/apimachinery/pkg/labels" 37 apitypes "k8s.io/apimachinery/pkg/types" 38 clientset "k8s.io/client-go/kubernetes" 39 "k8s.io/kubectl/pkg/util/podutils" 40 "k8s.io/kubernetes/test/e2e/framework" 41 testutils "k8s.io/kubernetes/test/utils" 42 "k8s.io/kubernetes/test/utils/format" 43 ) 44 45 const ( 46 // defaultPodDeletionTimeout is the default timeout for deleting pod. 47 defaultPodDeletionTimeout = 3 * time.Minute 48 49 // podListTimeout is how long to wait for the pod to be listable. 50 podListTimeout = time.Minute 51 52 podRespondingTimeout = 15 * time.Minute 53 54 // How long pods have to become scheduled onto nodes 55 podScheduledBeforeTimeout = podListTimeout + (20 * time.Second) 56 57 // podStartTimeout is how long to wait for the pod to be started. 58 podStartTimeout = 5 * time.Minute 59 60 // singleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent 61 // transient failures from failing tests. 62 singleCallTimeout = 5 * time.Minute 63 64 // Some pods can take much longer to get ready due to volume attach/detach latency. 65 slowPodStartTimeout = 15 * time.Minute 66 ) 67 68 type podCondition func(pod *v1.Pod) (bool, error) 69 70 // BeRunningNoRetries verifies that a pod starts running. It's a permanent 71 // failure when the pod enters some other permanent phase. 72 func BeRunningNoRetries() types.GomegaMatcher { 73 return gomega.And( 74 // This additional matcher checks for the final error condition. 75 gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) { 76 switch pod.Status.Phase { 77 case v1.PodFailed, v1.PodSucceeded: 78 return false, gomega.StopTrying(fmt.Sprintf("Expected pod to reach phase %q, got final phase %q instead.", v1.PodRunning, pod.Status.Phase)) 79 default: 80 return true, nil 81 } 82 }), 83 BeInPhase(v1.PodRunning), 84 ) 85 } 86 87 // BeInPhase matches if pod.status.phase is the expected phase. 88 func BeInPhase(phase v1.PodPhase) types.GomegaMatcher { 89 // A simple implementation of this would be: 90 // return gomega.HaveField("Status.Phase", phase) 91 // 92 // But that produces a fairly generic 93 // Value for field 'Status.Phase' failed to satisfy matcher. 94 // failure message and doesn't show the pod. We can do better than 95 // that with a custom matcher. 96 97 return gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) { 98 return pod.Status.Phase == phase, nil 99 }).WithTemplate("Expected Pod {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(phase) 100 } 101 102 // WaitForPodsRunningReady waits up to timeout to ensure that all pods in 103 // namespace ns are either running and ready, or failed but controlled by a 104 // controller. Also, it ensures that at least minPods are running and 105 // ready. It has separate behavior from other 'wait for' pods functions in 106 // that it requests the list of pods on every iteration. This is useful, for 107 // example, in cluster startup, because the number of pods increases while 108 // waiting. All pods that are in SUCCESS state are not counted. 109 // 110 // If minPods or allowedNotReadyPods are -1, this method returns immediately 111 // without waiting. 112 func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration) error { 113 if minPods == -1 || allowedNotReadyPods == -1 { 114 return nil 115 } 116 117 // We get the new list of pods, replication controllers, and replica 118 // sets in every iteration because more pods come online during startup 119 // and we want to ensure they are also checked. 120 // 121 // This struct gets populated while polling, then gets checked, and in 122 // case of a timeout is included in the failure message. 123 type state struct { 124 ReplicationControllers []v1.ReplicationController 125 ReplicaSets []appsv1.ReplicaSet 126 Pods []v1.Pod 127 } 128 129 // notReady is -1 for any failure other than a timeout. 130 // Otherwise it is the number of pods that we were still 131 // waiting for. 132 notReady := int32(-1) 133 134 err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) { 135 // Reset notReady at the start of a poll attempt. 136 notReady = -1 137 138 rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{}) 139 if err != nil { 140 return nil, fmt.Errorf("listing replication controllers in namespace %s: %w", ns, err) 141 } 142 rsList, err := c.AppsV1().ReplicaSets(ns).List(ctx, metav1.ListOptions{}) 143 if err != nil { 144 return nil, fmt.Errorf("listing replication sets in namespace %s: %w", ns, err) 145 } 146 podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{}) 147 if err != nil { 148 return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err) 149 } 150 return &state{ 151 ReplicationControllers: rcList.Items, 152 ReplicaSets: rsList.Items, 153 Pods: podList.Items, 154 }, nil 155 })).WithTimeout(timeout).Should(framework.MakeMatcher(func(s *state) (func() string, error) { 156 replicas, replicaOk := int32(0), int32(0) 157 for _, rc := range s.ReplicationControllers { 158 replicas += *rc.Spec.Replicas 159 replicaOk += rc.Status.ReadyReplicas 160 } 161 for _, rs := range s.ReplicaSets { 162 replicas += *rs.Spec.Replicas 163 replicaOk += rs.Status.ReadyReplicas 164 } 165 166 nOk := int32(0) 167 notReady = int32(0) 168 failedPods := []v1.Pod{} 169 otherPods := []v1.Pod{} 170 succeededPods := []string{} 171 for _, pod := range s.Pods { 172 res, err := testutils.PodRunningReady(&pod) 173 switch { 174 case res && err == nil: 175 nOk++ 176 case pod.Status.Phase == v1.PodSucceeded: 177 // it doesn't make sense to wait for this pod 178 succeededPods = append(succeededPods, pod.Name) 179 case pod.Status.Phase == v1.PodFailed: 180 // ignore failed pods that are controlled by some controller 181 if metav1.GetControllerOf(&pod) == nil { 182 failedPods = append(failedPods, pod) 183 } 184 default: 185 notReady++ 186 otherPods = append(otherPods, pod) 187 } 188 } 189 done := replicaOk == replicas && nOk >= minPods && (len(failedPods)+len(otherPods)) == 0 190 if done { 191 return nil, nil 192 } 193 194 // Delayed formatting of a failure message. 195 return func() string { 196 var buffer strings.Builder 197 buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready (except for %d).\n", minPods, ns, allowedNotReadyPods)) 198 buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(s.Pods))) 199 buffer.WriteString(fmt.Sprintf("Expected %d pod replicas, %d are Running and Ready.\n", replicas, replicaOk)) 200 if len(succeededPods) > 0 { 201 buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1))) 202 } 203 if len(failedPods) > 0 { 204 buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(failedPods, 1))) 205 } 206 if len(otherPods) > 0 { 207 buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1))) 208 } 209 return buffer.String() 210 }, nil 211 })) 212 213 // An error might not be fatal. 214 if err != nil && notReady >= 0 && notReady <= allowedNotReadyPods { 215 framework.Logf("Number of not-ready pods (%d) is below the allowed threshold (%d).", notReady, allowedNotReadyPods) 216 return nil 217 } 218 return err 219 } 220 221 // WaitForPodCondition waits a pods to be matched to the given condition. 222 // The condition callback may use gomega.StopTrying to abort early. 223 func WaitForPodCondition(ctx context.Context, c clientset.Interface, ns, podName, conditionDesc string, timeout time.Duration, condition podCondition) error { 224 return framework.Gomega(). 225 Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(ns).Get, podName, metav1.GetOptions{}))). 226 WithTimeout(timeout). 227 Should(framework.MakeMatcher(func(pod *v1.Pod) (func() string, error) { 228 done, err := condition(pod) 229 if err != nil { 230 return nil, err 231 } 232 if done { 233 return nil, nil 234 } 235 return func() string { 236 return fmt.Sprintf("expected pod to be %s, got instead:\n%s", conditionDesc, format.Object(pod, 1)) 237 }, nil 238 })) 239 } 240 241 // Range determines how many items must exist and how many must match a certain 242 // condition. Values <= 0 are ignored. 243 // TODO (?): move to test/e2e/framework/range 244 type Range struct { 245 // MinMatching must be <= actual matching items or <= 0. 246 MinMatching int 247 // MaxMatching must be >= actual matching items or <= 0. 248 // To check for "no matching items", set NonMatching. 249 MaxMatching int 250 // NoneMatching indicates that no item must match. 251 NoneMatching bool 252 // AllMatching indicates that all items must match. 253 AllMatching bool 254 // MinFound must be <= existing items or <= 0. 255 MinFound int 256 } 257 258 // Min returns how many items must exist. 259 func (r Range) Min() int { 260 min := r.MinMatching 261 if min < r.MinFound { 262 min = r.MinFound 263 } 264 return min 265 } 266 267 // WaitForPods waits for pods in the given namespace to match the given 268 // condition. How many pods must exist and how many must match the condition 269 // is determined by the range parameter. The condition callback may use 270 // gomega.StopTrying(...).Now() to abort early. The condition description 271 // will be used with "expected pods to <description>". 272 func WaitForPods(ctx context.Context, c clientset.Interface, ns string, opts metav1.ListOptions, r Range, timeout time.Duration, conditionDesc string, condition func(*v1.Pod) bool) (*v1.PodList, error) { 273 var finalPods *v1.PodList 274 minPods := r.Min() 275 match := func(pods *v1.PodList) (func() string, error) { 276 finalPods = pods 277 278 if len(pods.Items) < minPods { 279 return func() string { 280 return fmt.Sprintf("expected at least %d pods, only got %d", minPods, len(pods.Items)) 281 }, nil 282 } 283 284 var nonMatchingPods, matchingPods []v1.Pod 285 for _, pod := range pods.Items { 286 if condition(&pod) { 287 matchingPods = append(matchingPods, pod) 288 } else { 289 nonMatchingPods = append(nonMatchingPods, pod) 290 } 291 } 292 matching := len(pods.Items) - len(nonMatchingPods) 293 if matching < r.MinMatching && r.MinMatching > 0 { 294 return func() string { 295 return fmt.Sprintf("expected at least %d pods to %s, %d out of %d were not:\n%s", 296 r.MinMatching, conditionDesc, len(nonMatchingPods), len(pods.Items), 297 format.Object(nonMatchingPods, 1)) 298 }, nil 299 } 300 if len(nonMatchingPods) > 0 && r.AllMatching { 301 return func() string { 302 return fmt.Sprintf("expected all pods to %s, %d out of %d were not:\n%s", 303 conditionDesc, len(nonMatchingPods), len(pods.Items), 304 format.Object(nonMatchingPods, 1)) 305 }, nil 306 } 307 if matching > r.MaxMatching && r.MaxMatching > 0 { 308 return func() string { 309 return fmt.Sprintf("expected at most %d pods to %s, %d out of %d were:\n%s", 310 r.MinMatching, conditionDesc, len(matchingPods), len(pods.Items), 311 format.Object(matchingPods, 1)) 312 }, nil 313 } 314 if matching > 0 && r.NoneMatching { 315 return func() string { 316 return fmt.Sprintf("expected no pods to %s, %d out of %d were:\n%s", 317 conditionDesc, len(matchingPods), len(pods.Items), 318 format.Object(matchingPods, 1)) 319 }, nil 320 } 321 return nil, nil 322 } 323 324 err := framework.Gomega(). 325 Eventually(ctx, framework.ListObjects(c.CoreV1().Pods(ns).List, opts)). 326 WithTimeout(timeout). 327 Should(framework.MakeMatcher(match)) 328 return finalPods, err 329 } 330 331 // RunningReady checks whether pod p's phase is running and it has a ready 332 // condition of status true. 333 func RunningReady(p *v1.Pod) bool { 334 return p.Status.Phase == v1.PodRunning && podutils.IsPodReady(p) 335 } 336 337 // WaitForPodsRunning waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` are running. 338 func WaitForPodsRunning(c clientset.Interface, ns string, num int, timeout time.Duration) error { 339 _, err := WaitForPods(context.TODO(), c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout, 340 "be running and ready", func(pod *v1.Pod) bool { 341 ready, _ := testutils.PodRunningReady(pod) 342 return ready 343 }) 344 return err 345 } 346 347 // WaitForPodsSchedulingGated waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` stay in scheduling gated state. 348 func WaitForPodsSchedulingGated(c clientset.Interface, ns string, num int, timeout time.Duration) error { 349 _, err := WaitForPods(context.TODO(), c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout, 350 "be in scheduling gated state", func(pod *v1.Pod) bool { 351 for _, condition := range pod.Status.Conditions { 352 if condition.Type == v1.PodScheduled && condition.Reason == v1.PodReasonSchedulingGated { 353 return true 354 } 355 } 356 return false 357 }) 358 return err 359 } 360 361 // WaitForPodsWithSchedulingGates waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` 362 // match the given `schedulingGates`stay in scheduling gated state. 363 func WaitForPodsWithSchedulingGates(c clientset.Interface, ns string, num int, timeout time.Duration, schedulingGates []v1.PodSchedulingGate) error { 364 _, err := WaitForPods(context.TODO(), c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout, 365 "have certain scheduling gates", func(pod *v1.Pod) bool { 366 return reflect.DeepEqual(pod.Spec.SchedulingGates, schedulingGates) 367 }) 368 return err 369 } 370 371 // WaitForPodTerminatedInNamespace returns an error if it takes too long for the pod to terminate, 372 // if the pod Get api returns an error (IsNotFound or other), or if the pod failed (and thus did not 373 // terminate) with an unexpected reason. Typically called to test that the passed-in pod is fully 374 // terminated (reason==""), but may be called to detect if a pod did *not* terminate according to 375 // the supplied reason. 376 func WaitForPodTerminatedInNamespace(ctx context.Context, c clientset.Interface, podName, reason, namespace string) error { 377 return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("terminated with reason %s", reason), podStartTimeout, func(pod *v1.Pod) (bool, error) { 378 // Only consider Failed pods. Successful pods will be deleted and detected in 379 // waitForPodCondition's Get call returning `IsNotFound` 380 if pod.Status.Phase == v1.PodFailed { 381 if pod.Status.Reason == reason { // short-circuit waitForPodCondition's loop 382 return true, nil 383 } 384 return true, fmt.Errorf("Expected pod %q in namespace %q to be terminated with reason %q, got reason: %q", podName, namespace, reason, pod.Status.Reason) 385 } 386 return false, nil 387 }) 388 } 389 390 // WaitForPodTerminatingInNamespaceTimeout returns if the pod is terminating, or an error if it is not after the timeout. 391 func WaitForPodTerminatingInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 392 return WaitForPodCondition(ctx, c, namespace, podName, "is terminating", timeout, func(pod *v1.Pod) (bool, error) { 393 if pod.DeletionTimestamp != nil { 394 return true, nil 395 } 396 return false, nil 397 }) 398 } 399 400 // WaitForPodSuccessInNamespaceTimeout returns nil if the pod reached state success, or an error if it reached failure or ran too long. 401 func WaitForPodSuccessInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 402 return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("%s or %s", v1.PodSucceeded, v1.PodFailed), timeout, func(pod *v1.Pod) (bool, error) { 403 if pod.DeletionTimestamp == nil && pod.Spec.RestartPolicy == v1.RestartPolicyAlways { 404 return true, fmt.Errorf("pod %q will never terminate with a succeeded state since its restart policy is Always", podName) 405 } 406 switch pod.Status.Phase { 407 case v1.PodSucceeded: 408 ginkgo.By("Saw pod success") 409 return true, nil 410 case v1.PodFailed: 411 return true, fmt.Errorf("pod %q failed with status: %+v", podName, pod.Status) 412 default: 413 return false, nil 414 } 415 }) 416 } 417 418 // WaitForPodNameUnschedulableInNamespace returns an error if it takes too long for the pod to become Pending 419 // and have condition Status equal to Unschedulable, 420 // if the pod Get api returns an error (IsNotFound or other), or if the pod failed with an unexpected reason. 421 // Typically called to test that the passed-in pod is Pending and Unschedulable. 422 func WaitForPodNameUnschedulableInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error { 423 return WaitForPodCondition(ctx, c, namespace, podName, v1.PodReasonUnschedulable, podStartTimeout, func(pod *v1.Pod) (bool, error) { 424 // Only consider Failed pods. Successful pods will be deleted and detected in 425 // waitForPodCondition's Get call returning `IsNotFound` 426 if pod.Status.Phase == v1.PodPending { 427 for _, cond := range pod.Status.Conditions { 428 if cond.Type == v1.PodScheduled && cond.Status == v1.ConditionFalse && cond.Reason == v1.PodReasonUnschedulable { 429 return true, nil 430 } 431 } 432 } 433 if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { 434 return true, fmt.Errorf("Expected pod %q in namespace %q to be in phase Pending, but got phase: %v", podName, namespace, pod.Status.Phase) 435 } 436 return false, nil 437 }) 438 } 439 440 // WaitForPodNameRunningInNamespace waits default amount of time (PodStartTimeout) for the specified pod to become running. 441 // Returns an error if timeout occurs first, or pod goes in to failed state. 442 func WaitForPodNameRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error { 443 return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, podStartTimeout) 444 } 445 446 // WaitForPodRunningInNamespaceSlow waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running. 447 // The resourceVersion is used when Watching object changes, it tells since when we care 448 // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state. 449 func WaitForPodRunningInNamespaceSlow(ctx context.Context, c clientset.Interface, podName, namespace string) error { 450 return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, slowPodStartTimeout) 451 } 452 453 // WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running. 454 // It does not need to exist yet when this function gets called and the pod is not expected to be recreated 455 // when it succeeds or fails. 456 func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 457 return framework.Gomega().Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(namespace).Get, podName, metav1.GetOptions{}))). 458 WithTimeout(timeout). 459 Should(BeRunningNoRetries()) 460 } 461 462 // WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running. 463 // Returns an error if timeout occurs first, or pod goes in to failed state. 464 func WaitForPodRunningInNamespace(ctx context.Context, c clientset.Interface, pod *v1.Pod) error { 465 if pod.Status.Phase == v1.PodRunning { 466 return nil 467 } 468 return WaitTimeoutForPodRunningInNamespace(ctx, c, pod.Name, pod.Namespace, podStartTimeout) 469 } 470 471 // WaitTimeoutForPodNoLongerRunningInNamespace waits the given timeout duration for the specified pod to stop. 472 func WaitTimeoutForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 473 return WaitForPodCondition(ctx, c, namespace, podName, "completed", timeout, func(pod *v1.Pod) (bool, error) { 474 switch pod.Status.Phase { 475 case v1.PodFailed, v1.PodSucceeded: 476 return true, nil 477 } 478 return false, nil 479 }) 480 } 481 482 // WaitForPodNoLongerRunningInNamespace waits default amount of time (defaultPodDeletionTimeout) for the specified pod to stop running. 483 // Returns an error if timeout occurs first. 484 func WaitForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error { 485 return WaitTimeoutForPodNoLongerRunningInNamespace(ctx, c, podName, namespace, defaultPodDeletionTimeout) 486 } 487 488 // WaitTimeoutForPodReadyInNamespace waits the given timeout duration for the 489 // specified pod to be ready and running. 490 func WaitTimeoutForPodReadyInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 491 return WaitForPodCondition(ctx, c, namespace, podName, "running and ready", timeout, func(pod *v1.Pod) (bool, error) { 492 switch pod.Status.Phase { 493 case v1.PodFailed, v1.PodSucceeded: 494 return false, gomega.StopTrying(fmt.Sprintf("The phase of Pod %s is %s which is unexpected.", pod.Name, pod.Status.Phase)) 495 case v1.PodRunning: 496 return podutils.IsPodReady(pod), nil 497 } 498 return false, nil 499 }) 500 } 501 502 // WaitForPodNotPending returns an error if it took too long for the pod to go out of pending state. 503 // The resourceVersion is used when Watching object changes, it tells since when we care 504 // about changes to the pod. 505 func WaitForPodNotPending(ctx context.Context, c clientset.Interface, ns, podName string) error { 506 return WaitForPodCondition(ctx, c, ns, podName, "not pending", podStartTimeout, func(pod *v1.Pod) (bool, error) { 507 switch pod.Status.Phase { 508 case v1.PodPending: 509 return false, nil 510 default: 511 return true, nil 512 } 513 }) 514 } 515 516 // WaitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or until podStartupTimeout. 517 func WaitForPodSuccessInNamespace(ctx context.Context, c clientset.Interface, podName string, namespace string) error { 518 return WaitForPodSuccessInNamespaceTimeout(ctx, c, podName, namespace, podStartTimeout) 519 } 520 521 // WaitForPodSuccessInNamespaceSlow returns nil if the pod reached state success, or an error if it reached failure or until slowPodStartupTimeout. 522 func WaitForPodSuccessInNamespaceSlow(ctx context.Context, c clientset.Interface, podName string, namespace string) error { 523 return WaitForPodSuccessInNamespaceTimeout(ctx, c, podName, namespace, slowPodStartTimeout) 524 } 525 526 // WaitForPodNotFoundInNamespace returns an error if it takes too long for the pod to fully terminate. 527 // Unlike `waitForPodTerminatedInNamespace`, the pod's Phase and Reason are ignored. If the pod Get 528 // api returns IsNotFound then the wait stops and nil is returned. If the Get api returns an error other 529 // than "not found" and that error is final, that error is returned and the wait stops. 530 func WaitForPodNotFoundInNamespace(ctx context.Context, c clientset.Interface, podName, ns string, timeout time.Duration) error { 531 err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*v1.Pod, error) { 532 pod, err := c.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{}) 533 if apierrors.IsNotFound(err) { 534 return nil, nil 535 } 536 return pod, err 537 })).WithTimeout(timeout).Should(gomega.BeNil()) 538 if err != nil { 539 return fmt.Errorf("expected pod to not be found: %w", err) 540 } 541 return nil 542 } 543 544 // WaitForPodsResponding waits for the pods to response. 545 func WaitForPodsResponding(ctx context.Context, c clientset.Interface, ns string, controllerName string, wantName bool, timeout time.Duration, pods *v1.PodList) error { 546 if timeout == 0 { 547 timeout = podRespondingTimeout 548 } 549 ginkgo.By("trying to dial each unique pod") 550 label := labels.SelectorFromSet(labels.Set(map[string]string{"name": controllerName})) 551 options := metav1.ListOptions{LabelSelector: label.String()} 552 553 type response struct { 554 podName string 555 response string 556 } 557 558 get := func(ctx context.Context) ([]response, error) { 559 currentPods, err := c.CoreV1().Pods(ns).List(ctx, options) 560 if err != nil { 561 return nil, fmt.Errorf("list pods: %w", err) 562 } 563 564 var responses []response 565 for _, pod := range pods.Items { 566 // Check that the replica list remains unchanged, otherwise we have problems. 567 if !isElementOf(pod.UID, currentPods) { 568 return nil, gomega.StopTrying(fmt.Sprintf("Pod with UID %s is no longer a member of the replica set. Must have been restarted for some reason.\nCurrent replica set:\n%s", pod.UID, format.Object(currentPods, 1))) 569 } 570 571 ctxUntil, cancel := context.WithTimeout(ctx, singleCallTimeout) 572 defer cancel() 573 574 body, err := c.CoreV1().RESTClient().Get(). 575 Namespace(ns). 576 Resource("pods"). 577 SubResource("proxy"). 578 Name(string(pod.Name)). 579 Do(ctxUntil). 580 Raw() 581 582 if err != nil { 583 // We may encounter errors here because of a race between the pod readiness and apiserver 584 // proxy or because of temporary failures. The error gets wrapped for framework.HandleRetry. 585 // Gomega+Ginkgo will handle logging. 586 return nil, fmt.Errorf("controller %s: failed to Get from replica pod %s:\n%w\nPod status:\n%s", 587 controllerName, pod.Name, 588 err, format.Object(pod.Status, 1)) 589 } 590 responses = append(responses, response{podName: pod.Name, response: string(body)}) 591 } 592 return responses, nil 593 } 594 595 match := func(responses []response) (func() string, error) { 596 // The response checker expects the pod's name unless !respondName, in 597 // which case it just checks for a non-empty response. 598 var unexpected []response 599 for _, response := range responses { 600 if wantName { 601 if response.response != response.podName { 602 unexpected = append(unexpected, response) 603 } 604 } else { 605 if len(response.response) == 0 { 606 unexpected = append(unexpected, response) 607 } 608 } 609 } 610 if len(unexpected) > 0 { 611 return func() string { 612 what := "some response" 613 if wantName { 614 what = "the pod's own name as response" 615 } 616 return fmt.Sprintf("Wanted %s, but the following pods replied with something else:\n%s", what, format.Object(unexpected, 1)) 617 }, nil 618 } 619 return nil, nil 620 } 621 622 err := framework.Gomega(). 623 Eventually(ctx, framework.HandleRetry(get)). 624 WithTimeout(timeout). 625 Should(framework.MakeMatcher(match)) 626 if err != nil { 627 return fmt.Errorf("checking pod responses: %w", err) 628 } 629 return nil 630 } 631 632 func isElementOf(podUID apitypes.UID, pods *v1.PodList) bool { 633 for _, pod := range pods.Items { 634 if pod.UID == podUID { 635 return true 636 } 637 } 638 return false 639 } 640 641 // WaitForNumberOfPods waits up to timeout to ensure there are exact 642 // `num` pods in namespace `ns`. 643 // It returns the matching Pods or a timeout error. 644 func WaitForNumberOfPods(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) (pods *v1.PodList, err error) { 645 return WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, podScheduledBeforeTimeout, "exist", func(pod *v1.Pod) bool { 646 return true 647 }) 648 } 649 650 // WaitForPodsWithLabelScheduled waits for all matching pods to become scheduled and at least one 651 // matching pod exists. Return the list of matching pods. 652 func WaitForPodsWithLabelScheduled(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (pods *v1.PodList, err error) { 653 opts := metav1.ListOptions{LabelSelector: label.String()} 654 return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1, AllMatching: true}, podScheduledBeforeTimeout, "be scheduled", func(pod *v1.Pod) bool { 655 return pod.Spec.NodeName != "" 656 }) 657 } 658 659 // WaitForPodsWithLabel waits up to podListTimeout for getting pods with certain label 660 func WaitForPodsWithLabel(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (*v1.PodList, error) { 661 opts := metav1.ListOptions{LabelSelector: label.String()} 662 return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1}, podListTimeout, "exist", func(pod *v1.Pod) bool { 663 return true 664 }) 665 } 666 667 // WaitForPodsWithLabelRunningReady waits for exact amount of matching pods to become running and ready. 668 // Return the list of matching pods. 669 func WaitForPodsWithLabelRunningReady(ctx context.Context, c clientset.Interface, ns string, label labels.Selector, num int, timeout time.Duration) (pods *v1.PodList, err error) { 670 opts := metav1.ListOptions{LabelSelector: label.String()} 671 return WaitForPods(ctx, c, ns, opts, Range{MinFound: num, AllMatching: true}, timeout, "be running and ready", RunningReady) 672 } 673 674 // WaitForNRestartablePods tries to list restarting pods using ps until it finds expect of them, 675 // returning their names if it can do so before timeout. 676 func WaitForNRestartablePods(ctx context.Context, ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) { 677 var pods []*v1.Pod 678 679 get := func(ctx context.Context) ([]*v1.Pod, error) { 680 return ps.List(), nil 681 } 682 683 match := func(allPods []*v1.Pod) (func() string, error) { 684 pods = FilterNonRestartablePods(allPods) 685 if len(pods) != expect { 686 return func() string { 687 return fmt.Sprintf("expected to find non-restartable %d pods, but found %d:\n%s", expect, len(pods), format.Object(pods, 1)) 688 }, nil 689 } 690 return nil, nil 691 } 692 693 err := framework.Gomega(). 694 Eventually(ctx, framework.HandleRetry(get)). 695 WithTimeout(timeout). 696 Should(framework.MakeMatcher(match)) 697 if err != nil { 698 return nil, err 699 } 700 701 podNames := make([]string, len(pods)) 702 for i, p := range pods { 703 podNames[i] = p.Name 704 } 705 return podNames, nil 706 } 707 708 // WaitForPodContainerToFail waits for the given Pod container to fail with the given reason, specifically due to 709 // invalid container configuration. In this case, the container will remain in a waiting state with a specific 710 // reason set, which should match the given reason. 711 func WaitForPodContainerToFail(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, reason string, timeout time.Duration) error { 712 conditionDesc := fmt.Sprintf("container %d failed with reason %s", containerIndex, reason) 713 return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 714 switch pod.Status.Phase { 715 case v1.PodPending: 716 if len(pod.Status.ContainerStatuses) == 0 { 717 return false, nil 718 } 719 containerStatus := pod.Status.ContainerStatuses[containerIndex] 720 if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == reason { 721 return true, nil 722 } 723 return false, nil 724 case v1.PodFailed, v1.PodRunning, v1.PodSucceeded: 725 return false, fmt.Errorf("pod was expected to be pending, but it is in the state: %s", pod.Status.Phase) 726 } 727 return false, nil 728 }) 729 } 730 731 // WaitForPodScheduled waits for the pod to be schedule, ie. the .spec.nodeName is set 732 func WaitForPodScheduled(ctx context.Context, c clientset.Interface, namespace, podName string) error { 733 return WaitForPodCondition(ctx, c, namespace, podName, "pod is scheduled", podScheduledBeforeTimeout, func(pod *v1.Pod) (bool, error) { 734 return pod.Spec.NodeName != "", nil 735 }) 736 } 737 738 // WaitForPodContainerStarted waits for the given Pod container to start, after a successful run of the startupProbe. 739 func WaitForPodContainerStarted(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, timeout time.Duration) error { 740 conditionDesc := fmt.Sprintf("container %d started", containerIndex) 741 return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 742 if containerIndex > len(pod.Status.ContainerStatuses)-1 { 743 return false, nil 744 } 745 containerStatus := pod.Status.ContainerStatuses[containerIndex] 746 return *containerStatus.Started, nil 747 }) 748 } 749 750 // WaitForPodFailedReason wait for pod failed reason in status, for example "SysctlForbidden". 751 func WaitForPodFailedReason(ctx context.Context, c clientset.Interface, pod *v1.Pod, reason string, timeout time.Duration) error { 752 conditionDesc := fmt.Sprintf("failed with reason %s", reason) 753 return WaitForPodCondition(ctx, c, pod.Namespace, pod.Name, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 754 switch pod.Status.Phase { 755 case v1.PodSucceeded: 756 return true, errors.New("pod succeeded unexpectedly") 757 case v1.PodFailed: 758 if pod.Status.Reason == reason { 759 return true, nil 760 } else { 761 return true, fmt.Errorf("pod failed with reason %s", pod.Status.Reason) 762 } 763 } 764 return false, nil 765 }) 766 } 767 768 // WaitForContainerRunning waits for the given Pod container to have a state of running 769 func WaitForContainerRunning(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error { 770 conditionDesc := fmt.Sprintf("container %s running", containerName) 771 return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 772 for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} { 773 for _, cs := range statuses { 774 if cs.Name == containerName { 775 return cs.State.Running != nil, nil 776 } 777 } 778 } 779 return false, nil 780 }) 781 }