k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/framework/pod/wait.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pod 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "reflect" 24 "strings" 25 "time" 26 27 "github.com/onsi/ginkgo/v2" 28 "github.com/onsi/gomega" 29 "github.com/onsi/gomega/gcustom" 30 "github.com/onsi/gomega/types" 31 32 appsv1 "k8s.io/api/apps/v1" 33 v1 "k8s.io/api/core/v1" 34 apierrors "k8s.io/apimachinery/pkg/api/errors" 35 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 "k8s.io/apimachinery/pkg/labels" 37 apitypes "k8s.io/apimachinery/pkg/types" 38 clientset "k8s.io/client-go/kubernetes" 39 "k8s.io/kubectl/pkg/util/podutils" 40 "k8s.io/kubernetes/test/e2e/framework" 41 testutils "k8s.io/kubernetes/test/utils" 42 "k8s.io/kubernetes/test/utils/format" 43 ) 44 45 const ( 46 // defaultPodDeletionTimeout is the default timeout for deleting pod. 47 defaultPodDeletionTimeout = 3 * time.Minute 48 49 // podListTimeout is how long to wait for the pod to be listable. 50 podListTimeout = time.Minute 51 52 podRespondingTimeout = 15 * time.Minute 53 54 // How long pods have to become scheduled onto nodes 55 podScheduledBeforeTimeout = podListTimeout + (20 * time.Second) 56 57 // podStartTimeout is how long to wait for the pod to be started. 58 podStartTimeout = 5 * time.Minute 59 60 // singleCallTimeout is how long to try single API calls (like 'get' or 'list'). Used to prevent 61 // transient failures from failing tests. 62 singleCallTimeout = 5 * time.Minute 63 64 // Some pods can take much longer to get ready due to volume attach/detach latency. 65 slowPodStartTimeout = 15 * time.Minute 66 ) 67 68 type podCondition func(pod *v1.Pod) (bool, error) 69 70 // BeRunningNoRetries verifies that a pod starts running. It's a permanent 71 // failure when the pod enters some other permanent phase. 72 func BeRunningNoRetries() types.GomegaMatcher { 73 return gomega.And( 74 // This additional matcher checks for the final error condition. 75 gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) { 76 switch pod.Status.Phase { 77 case v1.PodFailed, v1.PodSucceeded: 78 return false, gomega.StopTrying(fmt.Sprintf("Expected pod to reach phase %q, got final phase %q instead:\n%s", v1.PodRunning, pod.Status.Phase, format.Object(pod, 1))) 79 default: 80 return true, nil 81 } 82 }), 83 BeInPhase(v1.PodRunning), 84 ) 85 } 86 87 // BeInPhase matches if pod.status.phase is the expected phase. 88 func BeInPhase(phase v1.PodPhase) types.GomegaMatcher { 89 // A simple implementation of this would be: 90 // return gomega.HaveField("Status.Phase", phase) 91 // 92 // But that produces a fairly generic 93 // Value for field 'Status.Phase' failed to satisfy matcher. 94 // failure message and doesn't show the pod. We can do better than 95 // that with a custom matcher. 96 97 return gcustom.MakeMatcher(func(pod *v1.Pod) (bool, error) { 98 return pod.Status.Phase == phase, nil 99 }).WithTemplate("Expected Pod {{.To}} be in {{format .Data}}\nGot instead:\n{{.FormattedActual}}").WithTemplateData(phase) 100 } 101 102 // WaitForAlmostAllReady waits up to timeout for the following conditions: 103 // 1. At least minPods Pods in Namespace ns are Running and Ready 104 // 2. All Pods in Namespace ns are either Ready or Succeeded 105 // 3. All Pods part of a ReplicaSet or ReplicationController in Namespace ns are Ready 106 // 107 // After the timeout has elapsed, an error is returned if the number of Pods in a Pending Phase 108 // is greater than allowedNotReadyPods. 109 // 110 // It is generally recommended to use WaitForPodsRunningReady instead of this function 111 // whenever possible, because its behavior is more intuitive. Similar to WaitForPodsRunningReady, 112 // this function requests the list of pods on every iteration, making it useful for situations 113 // where the set of Pods is likely changing, such as during cluster startup. 114 // 115 // If minPods or allowedNotReadyPods are -1, this method returns immediately 116 // without waiting. 117 func WaitForAlmostAllPodsReady(ctx context.Context, c clientset.Interface, ns string, minPods, allowedNotReadyPods int, timeout time.Duration) error { 118 if minPods == -1 || allowedNotReadyPods == -1 { 119 return nil 120 } 121 122 // We get the new list of pods, replication controllers, and replica 123 // sets in every iteration because more pods come online during startup 124 // and we want to ensure they are also checked. 125 // 126 // This struct gets populated while polling, then gets checked, and in 127 // case of a timeout is included in the failure message. 128 type state struct { 129 ReplicationControllers []v1.ReplicationController 130 ReplicaSets []appsv1.ReplicaSet 131 Pods []v1.Pod 132 } 133 134 nOk := 0 135 badPods := []v1.Pod{} 136 otherPods := []v1.Pod{} 137 succeededPods := []string{} 138 139 err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*state, error) { 140 141 rcList, err := c.CoreV1().ReplicationControllers(ns).List(ctx, metav1.ListOptions{}) 142 if err != nil { 143 return nil, fmt.Errorf("listing replication controllers in namespace %s: %w", ns, err) 144 } 145 rsList, err := c.AppsV1().ReplicaSets(ns).List(ctx, metav1.ListOptions{}) 146 if err != nil { 147 return nil, fmt.Errorf("listing replication sets in namespace %s: %w", ns, err) 148 } 149 podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{}) 150 if err != nil { 151 return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err) 152 } 153 return &state{ 154 ReplicationControllers: rcList.Items, 155 ReplicaSets: rsList.Items, 156 Pods: podList.Items, 157 }, nil 158 })).WithTimeout(timeout).Should(framework.MakeMatcher(func(s *state) (func() string, error) { 159 replicas, replicaOk := int32(0), int32(0) 160 for _, rc := range s.ReplicationControllers { 161 replicas += *rc.Spec.Replicas 162 replicaOk += rc.Status.ReadyReplicas 163 } 164 for _, rs := range s.ReplicaSets { 165 replicas += *rs.Spec.Replicas 166 replicaOk += rs.Status.ReadyReplicas 167 } 168 169 nOk = 0 170 badPods = []v1.Pod{} 171 otherPods = []v1.Pod{} 172 succeededPods = []string{} 173 for _, pod := range s.Pods { 174 res, err := testutils.PodRunningReady(&pod) 175 switch { 176 case res && err == nil: 177 nOk++ 178 case pod.Status.Phase == v1.PodSucceeded: 179 // it doesn't make sense to wait for this pod 180 succeededPods = append(succeededPods, pod.Name) 181 case pod.Status.Phase == v1.PodFailed: 182 // ignore failed pods that are controlled by some controller 183 if metav1.GetControllerOf(&pod) == nil { 184 badPods = append(badPods, pod) 185 } 186 default: 187 otherPods = append(otherPods, pod) 188 } 189 } 190 done := replicaOk == replicas && nOk >= minPods && (len(badPods)+len(otherPods)) == 0 191 if done { 192 return nil, nil 193 } 194 195 // Delayed formatting of a failure message. 196 return func() string { 197 var buffer strings.Builder 198 buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready (except for %d).\n", minPods, ns, allowedNotReadyPods)) 199 buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(s.Pods))) 200 buffer.WriteString(fmt.Sprintf("Expected %d pod replicas, %d are Running and Ready.\n", replicas, replicaOk)) 201 if len(succeededPods) > 0 { 202 buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1))) 203 } 204 if len(badPods) > 0 { 205 buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(badPods, 1))) 206 } 207 if len(otherPods) > 0 { 208 buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1))) 209 } 210 return buffer.String() 211 }, nil 212 })) 213 214 // An error might not be fatal. 215 if len(otherPods) <= allowedNotReadyPods { 216 return nil 217 } 218 return err 219 } 220 221 // WaitForPodsRunningReady waits up to timeout for the following conditions: 222 // 1. At least minPods Pods in Namespace ns are Running and Ready 223 // 2. No Pods in Namespace ns are Failed and not owned by a controller or Pending 224 // 225 // An error is returned if either of these conditions are not met within the timeout. 226 // 227 // It has separate behavior from other 'wait for' pods functions in 228 // that it requests the list of pods on every iteration. This is useful, for 229 // example, in cluster startup, because the number of pods increases while 230 // waiting. All pods that are in SUCCESS state are not counted. 231 func WaitForPodsRunningReady(ctx context.Context, c clientset.Interface, ns string, minPods int, timeout time.Duration) error { 232 233 return framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) ([]v1.Pod, error) { 234 235 podList, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{}) 236 if err != nil { 237 return nil, fmt.Errorf("listing pods in namespace %s: %w", ns, err) 238 } 239 return podList.Items, nil 240 })).WithTimeout(timeout).Should(framework.MakeMatcher(func(pods []v1.Pod) (func() string, error) { 241 242 nOk := 0 243 badPods := []v1.Pod{} 244 otherPods := []v1.Pod{} 245 succeededPods := []string{} 246 247 for _, pod := range pods { 248 res, err := testutils.PodRunningReady(&pod) 249 switch { 250 case res && err == nil: 251 nOk++ 252 case pod.Status.Phase == v1.PodSucceeded: 253 // ignore succeeded pods 254 succeededPods = append(succeededPods, pod.Name) 255 case pod.Status.Phase == v1.PodFailed: 256 // ignore failed pods that are controlled by some controller 257 if metav1.GetControllerOf(&pod) == nil { 258 badPods = append(badPods, pod) 259 } 260 default: 261 otherPods = append(otherPods, pod) 262 } 263 } 264 if nOk >= minPods && len(badPods)+len(otherPods) == 0 { 265 return nil, nil 266 } 267 268 // Delayed formatting of a failure message. 269 return func() string { 270 var buffer strings.Builder 271 buffer.WriteString(fmt.Sprintf("Expected all pods (need at least %d) in namespace %q to be running and ready \n", minPods, ns)) 272 buffer.WriteString(fmt.Sprintf("%d / %d pods were running and ready.\n", nOk, len(pods))) 273 if len(succeededPods) > 0 { 274 buffer.WriteString(fmt.Sprintf("Pods that completed successfully:\n%s", format.Object(succeededPods, 1))) 275 } 276 if len(badPods) > 0 { 277 buffer.WriteString(fmt.Sprintf("Pods that failed and were not controlled by some controller:\n%s", format.Object(badPods, 1))) 278 } 279 if len(otherPods) > 0 { 280 buffer.WriteString(fmt.Sprintf("Pods that were neither completed nor running:\n%s", format.Object(otherPods, 1))) 281 } 282 return buffer.String() 283 }, nil 284 })) 285 286 } 287 288 // WaitForPodCondition waits a pods to be matched to the given condition. 289 // The condition callback may use gomega.StopTrying to abort early. 290 func WaitForPodCondition(ctx context.Context, c clientset.Interface, ns, podName, conditionDesc string, timeout time.Duration, condition podCondition) error { 291 return framework.Gomega(). 292 Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(ns).Get, podName, metav1.GetOptions{}))). 293 WithTimeout(timeout). 294 Should(framework.MakeMatcher(func(pod *v1.Pod) (func() string, error) { 295 done, err := condition(pod) 296 if err != nil { 297 return nil, err 298 } 299 if done { 300 return nil, nil 301 } 302 return func() string { 303 return fmt.Sprintf("expected pod to be %s, got instead:\n%s", conditionDesc, format.Object(pod, 1)) 304 }, nil 305 })) 306 } 307 308 // Range determines how many items must exist and how many must match a certain 309 // condition. Values <= 0 are ignored. 310 // TODO (?): move to test/e2e/framework/range 311 type Range struct { 312 // MinMatching must be <= actual matching items or <= 0. 313 MinMatching int 314 // MaxMatching must be >= actual matching items or <= 0. 315 // To check for "no matching items", set NonMatching. 316 MaxMatching int 317 // NoneMatching indicates that no item must match. 318 NoneMatching bool 319 // AllMatching indicates that all items must match. 320 AllMatching bool 321 // MinFound must be <= existing items or <= 0. 322 MinFound int 323 } 324 325 // Min returns how many items must exist. 326 func (r Range) Min() int { 327 min := r.MinMatching 328 if min < r.MinFound { 329 min = r.MinFound 330 } 331 return min 332 } 333 334 // WaitForPods waits for pods in the given namespace to match the given 335 // condition. How many pods must exist and how many must match the condition 336 // is determined by the range parameter. The condition callback may use 337 // gomega.StopTrying(...).Now() to abort early. The condition description 338 // will be used with "expected pods to <description>". 339 func WaitForPods(ctx context.Context, c clientset.Interface, ns string, opts metav1.ListOptions, r Range, timeout time.Duration, conditionDesc string, condition func(*v1.Pod) bool) (*v1.PodList, error) { 340 var finalPods *v1.PodList 341 minPods := r.Min() 342 match := func(pods *v1.PodList) (func() string, error) { 343 finalPods = pods 344 345 if len(pods.Items) < minPods { 346 return func() string { 347 return fmt.Sprintf("expected at least %d pods, only got %d", minPods, len(pods.Items)) 348 }, nil 349 } 350 351 var nonMatchingPods, matchingPods []v1.Pod 352 for _, pod := range pods.Items { 353 if condition(&pod) { 354 matchingPods = append(matchingPods, pod) 355 } else { 356 nonMatchingPods = append(nonMatchingPods, pod) 357 } 358 } 359 matching := len(pods.Items) - len(nonMatchingPods) 360 if matching < r.MinMatching && r.MinMatching > 0 { 361 return func() string { 362 return fmt.Sprintf("expected at least %d pods to %s, %d out of %d were not:\n%s", 363 r.MinMatching, conditionDesc, len(nonMatchingPods), len(pods.Items), 364 format.Object(nonMatchingPods, 1)) 365 }, nil 366 } 367 if len(nonMatchingPods) > 0 && r.AllMatching { 368 return func() string { 369 return fmt.Sprintf("expected all pods to %s, %d out of %d were not:\n%s", 370 conditionDesc, len(nonMatchingPods), len(pods.Items), 371 format.Object(nonMatchingPods, 1)) 372 }, nil 373 } 374 if matching > r.MaxMatching && r.MaxMatching > 0 { 375 return func() string { 376 return fmt.Sprintf("expected at most %d pods to %s, %d out of %d were:\n%s", 377 r.MinMatching, conditionDesc, len(matchingPods), len(pods.Items), 378 format.Object(matchingPods, 1)) 379 }, nil 380 } 381 if matching > 0 && r.NoneMatching { 382 return func() string { 383 return fmt.Sprintf("expected no pods to %s, %d out of %d were:\n%s", 384 conditionDesc, len(matchingPods), len(pods.Items), 385 format.Object(matchingPods, 1)) 386 }, nil 387 } 388 return nil, nil 389 } 390 391 err := framework.Gomega(). 392 Eventually(ctx, framework.ListObjects(c.CoreV1().Pods(ns).List, opts)). 393 WithTimeout(timeout). 394 Should(framework.MakeMatcher(match)) 395 return finalPods, err 396 } 397 398 // RunningReady checks whether pod p's phase is running and it has a ready 399 // condition of status true. 400 func RunningReady(p *v1.Pod) bool { 401 return p.Status.Phase == v1.PodRunning && podutils.IsPodReady(p) 402 } 403 404 // WaitForPodsRunning waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` are running. 405 func WaitForPodsRunning(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) error { 406 _, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout, 407 "be running and ready", func(pod *v1.Pod) bool { 408 ready, _ := testutils.PodRunningReady(pod) 409 return ready 410 }) 411 return err 412 } 413 414 // WaitForPodsSchedulingGated waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` stay in scheduling gated state. 415 func WaitForPodsSchedulingGated(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) error { 416 _, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout, 417 "be in scheduling gated state", func(pod *v1.Pod) bool { 418 for _, condition := range pod.Status.Conditions { 419 if condition.Type == v1.PodScheduled && condition.Reason == v1.PodReasonSchedulingGated { 420 return true 421 } 422 } 423 return false 424 }) 425 return err 426 } 427 428 // WaitForPodsWithSchedulingGates waits for a given `timeout` to evaluate if a certain amount of pods in given `ns` 429 // match the given `schedulingGates`stay in scheduling gated state. 430 func WaitForPodsWithSchedulingGates(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration, schedulingGates []v1.PodSchedulingGate) error { 431 _, err := WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, timeout, 432 "have certain scheduling gates", func(pod *v1.Pod) bool { 433 return reflect.DeepEqual(pod.Spec.SchedulingGates, schedulingGates) 434 }) 435 return err 436 } 437 438 // WaitForPodTerminatedInNamespace returns an error if it takes too long for the pod to terminate, 439 // if the pod Get api returns an error (IsNotFound or other), or if the pod failed (and thus did not 440 // terminate) with an unexpected reason. Typically called to test that the passed-in pod is fully 441 // terminated (reason==""), but may be called to detect if a pod did *not* terminate according to 442 // the supplied reason. 443 func WaitForPodTerminatedInNamespace(ctx context.Context, c clientset.Interface, podName, reason, namespace string) error { 444 return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("terminated with reason %s", reason), podStartTimeout, func(pod *v1.Pod) (bool, error) { 445 // Only consider Failed pods. Successful pods will be deleted and detected in 446 // waitForPodCondition's Get call returning `IsNotFound` 447 if pod.Status.Phase == v1.PodFailed { 448 if pod.Status.Reason == reason { // short-circuit waitForPodCondition's loop 449 return true, nil 450 } 451 return true, fmt.Errorf("Expected pod %q in namespace %q to be terminated with reason %q, got reason: %q", podName, namespace, reason, pod.Status.Reason) 452 } 453 return false, nil 454 }) 455 } 456 457 // WaitForPodTerminatingInNamespaceTimeout returns if the pod is terminating, or an error if it is not after the timeout. 458 func WaitForPodTerminatingInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 459 return WaitForPodCondition(ctx, c, namespace, podName, "is terminating", timeout, func(pod *v1.Pod) (bool, error) { 460 if pod.DeletionTimestamp != nil { 461 return true, nil 462 } 463 return false, nil 464 }) 465 } 466 467 // WaitForPodSuccessInNamespaceTimeout returns nil if the pod reached state success, or an error if it reached failure or ran too long. 468 func WaitForPodSuccessInNamespaceTimeout(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 469 return WaitForPodCondition(ctx, c, namespace, podName, fmt.Sprintf("%s or %s", v1.PodSucceeded, v1.PodFailed), timeout, func(pod *v1.Pod) (bool, error) { 470 if pod.DeletionTimestamp == nil && pod.Spec.RestartPolicy == v1.RestartPolicyAlways { 471 return true, gomega.StopTrying(fmt.Sprintf("pod %q will never terminate with a succeeded state since its restart policy is Always", podName)) 472 } 473 switch pod.Status.Phase { 474 case v1.PodSucceeded: 475 ginkgo.By("Saw pod success") 476 return true, nil 477 case v1.PodFailed: 478 return true, gomega.StopTrying(fmt.Sprintf("pod %q failed with status: %+v", podName, pod.Status)) 479 default: 480 return false, nil 481 } 482 }) 483 } 484 485 // WaitForPodNameUnschedulableInNamespace returns an error if it takes too long for the pod to become Pending 486 // and have condition Status equal to Unschedulable, 487 // if the pod Get api returns an error (IsNotFound or other), or if the pod failed with an unexpected reason. 488 // Typically called to test that the passed-in pod is Pending and Unschedulable. 489 func WaitForPodNameUnschedulableInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error { 490 return WaitForPodCondition(ctx, c, namespace, podName, v1.PodReasonUnschedulable, podStartTimeout, func(pod *v1.Pod) (bool, error) { 491 // Only consider Failed pods. Successful pods will be deleted and detected in 492 // waitForPodCondition's Get call returning `IsNotFound` 493 if pod.Status.Phase == v1.PodPending { 494 for _, cond := range pod.Status.Conditions { 495 if cond.Type == v1.PodScheduled && cond.Status == v1.ConditionFalse && cond.Reason == v1.PodReasonUnschedulable { 496 return true, nil 497 } 498 } 499 } 500 if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { 501 return true, fmt.Errorf("Expected pod %q in namespace %q to be in phase Pending, but got phase: %v", podName, namespace, pod.Status.Phase) 502 } 503 return false, nil 504 }) 505 } 506 507 // WaitForPodNameRunningInNamespace waits default amount of time (PodStartTimeout) for the specified pod to become running. 508 // Returns an error if timeout occurs first, or pod goes in to failed state. 509 func WaitForPodNameRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error { 510 return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, podStartTimeout) 511 } 512 513 // WaitForPodRunningInNamespaceSlow waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running. 514 // The resourceVersion is used when Watching object changes, it tells since when we care 515 // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state. 516 func WaitForPodRunningInNamespaceSlow(ctx context.Context, c clientset.Interface, podName, namespace string) error { 517 return WaitTimeoutForPodRunningInNamespace(ctx, c, podName, namespace, slowPodStartTimeout) 518 } 519 520 // WaitTimeoutForPodRunningInNamespace waits the given timeout duration for the specified pod to become running. 521 // It does not need to exist yet when this function gets called and the pod is not expected to be recreated 522 // when it succeeds or fails. 523 func WaitTimeoutForPodRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 524 return framework.Gomega().Eventually(ctx, framework.RetryNotFound(framework.GetObject(c.CoreV1().Pods(namespace).Get, podName, metav1.GetOptions{}))). 525 WithTimeout(timeout). 526 Should(BeRunningNoRetries()) 527 } 528 529 // WaitForPodRunningInNamespace waits default amount of time (podStartTimeout) for the specified pod to become running. 530 // Returns an error if timeout occurs first, or pod goes in to failed state. 531 func WaitForPodRunningInNamespace(ctx context.Context, c clientset.Interface, pod *v1.Pod) error { 532 if pod.Status.Phase == v1.PodRunning { 533 return nil 534 } 535 return WaitTimeoutForPodRunningInNamespace(ctx, c, pod.Name, pod.Namespace, podStartTimeout) 536 } 537 538 // WaitTimeoutForPodNoLongerRunningInNamespace waits the given timeout duration for the specified pod to stop. 539 func WaitTimeoutForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 540 return WaitForPodCondition(ctx, c, namespace, podName, "completed", timeout, func(pod *v1.Pod) (bool, error) { 541 switch pod.Status.Phase { 542 case v1.PodFailed, v1.PodSucceeded: 543 return true, nil 544 } 545 return false, nil 546 }) 547 } 548 549 // WaitForPodNoLongerRunningInNamespace waits default amount of time (defaultPodDeletionTimeout) for the specified pod to stop running. 550 // Returns an error if timeout occurs first. 551 func WaitForPodNoLongerRunningInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string) error { 552 return WaitTimeoutForPodNoLongerRunningInNamespace(ctx, c, podName, namespace, defaultPodDeletionTimeout) 553 } 554 555 // WaitTimeoutForPodReadyInNamespace waits the given timeout duration for the 556 // specified pod to be ready and running. 557 func WaitTimeoutForPodReadyInNamespace(ctx context.Context, c clientset.Interface, podName, namespace string, timeout time.Duration) error { 558 return WaitForPodCondition(ctx, c, namespace, podName, "running and ready", timeout, func(pod *v1.Pod) (bool, error) { 559 switch pod.Status.Phase { 560 case v1.PodFailed, v1.PodSucceeded: 561 return false, gomega.StopTrying(fmt.Sprintf("The phase of Pod %s is %s which is unexpected.", pod.Name, pod.Status.Phase)) 562 case v1.PodRunning: 563 return podutils.IsPodReady(pod), nil 564 } 565 return false, nil 566 }) 567 } 568 569 // WaitForPodNotPending returns an error if it took too long for the pod to go out of pending state. 570 // The resourceVersion is used when Watching object changes, it tells since when we care 571 // about changes to the pod. 572 func WaitForPodNotPending(ctx context.Context, c clientset.Interface, ns, podName string) error { 573 return WaitForPodCondition(ctx, c, ns, podName, "not pending", podStartTimeout, func(pod *v1.Pod) (bool, error) { 574 switch pod.Status.Phase { 575 case v1.PodPending: 576 return false, nil 577 default: 578 return true, nil 579 } 580 }) 581 } 582 583 // WaitForPodSuccessInNamespace returns nil if the pod reached state success, or an error if it reached failure or until podStartupTimeout. 584 func WaitForPodSuccessInNamespace(ctx context.Context, c clientset.Interface, podName string, namespace string) error { 585 return WaitForPodSuccessInNamespaceTimeout(ctx, c, podName, namespace, podStartTimeout) 586 } 587 588 // WaitForPodNotFoundInNamespace returns an error if it takes too long for the pod to fully terminate. 589 // Unlike `waitForPodTerminatedInNamespace`, the pod's Phase and Reason are ignored. If the pod Get 590 // api returns IsNotFound then the wait stops and nil is returned. If the Get api returns an error other 591 // than "not found" and that error is final, that error is returned and the wait stops. 592 func WaitForPodNotFoundInNamespace(ctx context.Context, c clientset.Interface, podName, ns string, timeout time.Duration) error { 593 err := framework.Gomega().Eventually(ctx, framework.HandleRetry(func(ctx context.Context) (*v1.Pod, error) { 594 pod, err := c.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{}) 595 if apierrors.IsNotFound(err) { 596 return nil, nil 597 } 598 return pod, err 599 })).WithTimeout(timeout).Should(gomega.BeNil()) 600 if err != nil { 601 return fmt.Errorf("expected pod to not be found: %w", err) 602 } 603 return nil 604 } 605 606 // WaitForPodsResponding waits for the pods to response. 607 func WaitForPodsResponding(ctx context.Context, c clientset.Interface, ns string, controllerName string, wantName bool, timeout time.Duration, pods *v1.PodList) error { 608 if timeout == 0 { 609 timeout = podRespondingTimeout 610 } 611 ginkgo.By("trying to dial each unique pod") 612 label := labels.SelectorFromSet(labels.Set(map[string]string{"name": controllerName})) 613 options := metav1.ListOptions{LabelSelector: label.String()} 614 615 type response struct { 616 podName string 617 response string 618 } 619 620 get := func(ctx context.Context) ([]response, error) { 621 currentPods, err := c.CoreV1().Pods(ns).List(ctx, options) 622 if err != nil { 623 return nil, fmt.Errorf("list pods: %w", err) 624 } 625 626 var responses []response 627 for _, pod := range pods.Items { 628 // Check that the replica list remains unchanged, otherwise we have problems. 629 if !isElementOf(pod.UID, currentPods) { 630 return nil, gomega.StopTrying(fmt.Sprintf("Pod with UID %s is no longer a member of the replica set. Must have been restarted for some reason.\nCurrent replica set:\n%s", pod.UID, format.Object(currentPods, 1))) 631 } 632 633 ctxUntil, cancel := context.WithTimeout(ctx, singleCallTimeout) 634 defer cancel() 635 636 body, err := c.CoreV1().RESTClient().Get(). 637 Namespace(ns). 638 Resource("pods"). 639 SubResource("proxy"). 640 Name(string(pod.Name)). 641 Do(ctxUntil). 642 Raw() 643 644 if err != nil { 645 // We may encounter errors here because of a race between the pod readiness and apiserver 646 // proxy or because of temporary failures. The error gets wrapped for framework.HandleRetry. 647 // Gomega+Ginkgo will handle logging. 648 return nil, fmt.Errorf("controller %s: failed to Get from replica pod %s:\n%w\nPod status:\n%s", 649 controllerName, pod.Name, 650 err, format.Object(pod.Status, 1)) 651 } 652 responses = append(responses, response{podName: pod.Name, response: string(body)}) 653 } 654 return responses, nil 655 } 656 657 match := func(responses []response) (func() string, error) { 658 // The response checker expects the pod's name unless !respondName, in 659 // which case it just checks for a non-empty response. 660 var unexpected []response 661 for _, response := range responses { 662 if wantName { 663 if response.response != response.podName { 664 unexpected = append(unexpected, response) 665 } 666 } else { 667 if len(response.response) == 0 { 668 unexpected = append(unexpected, response) 669 } 670 } 671 } 672 if len(unexpected) > 0 { 673 return func() string { 674 what := "some response" 675 if wantName { 676 what = "the pod's own name as response" 677 } 678 return fmt.Sprintf("Wanted %s, but the following pods replied with something else:\n%s", what, format.Object(unexpected, 1)) 679 }, nil 680 } 681 return nil, nil 682 } 683 684 err := framework.Gomega(). 685 Eventually(ctx, framework.HandleRetry(get)). 686 WithTimeout(timeout). 687 Should(framework.MakeMatcher(match)) 688 if err != nil { 689 return fmt.Errorf("checking pod responses: %w", err) 690 } 691 return nil 692 } 693 694 func isElementOf(podUID apitypes.UID, pods *v1.PodList) bool { 695 for _, pod := range pods.Items { 696 if pod.UID == podUID { 697 return true 698 } 699 } 700 return false 701 } 702 703 // WaitForNumberOfPods waits up to timeout to ensure there are exact 704 // `num` pods in namespace `ns`. 705 // It returns the matching Pods or a timeout error. 706 func WaitForNumberOfPods(ctx context.Context, c clientset.Interface, ns string, num int, timeout time.Duration) (pods *v1.PodList, err error) { 707 return WaitForPods(ctx, c, ns, metav1.ListOptions{}, Range{MinMatching: num, MaxMatching: num}, podScheduledBeforeTimeout, "exist", func(pod *v1.Pod) bool { 708 return true 709 }) 710 } 711 712 // WaitForPodsWithLabelScheduled waits for all matching pods to become scheduled and at least one 713 // matching pod exists. Return the list of matching pods. 714 func WaitForPodsWithLabelScheduled(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (pods *v1.PodList, err error) { 715 opts := metav1.ListOptions{LabelSelector: label.String()} 716 return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1, AllMatching: true}, podScheduledBeforeTimeout, "be scheduled", func(pod *v1.Pod) bool { 717 return pod.Spec.NodeName != "" 718 }) 719 } 720 721 // WaitForPodsWithLabel waits up to podListTimeout for getting pods with certain label 722 func WaitForPodsWithLabel(ctx context.Context, c clientset.Interface, ns string, label labels.Selector) (*v1.PodList, error) { 723 opts := metav1.ListOptions{LabelSelector: label.String()} 724 return WaitForPods(ctx, c, ns, opts, Range{MinFound: 1}, podListTimeout, "exist", func(pod *v1.Pod) bool { 725 return true 726 }) 727 } 728 729 // WaitForPodsWithLabelRunningReady waits for exact amount of matching pods to become running and ready. 730 // Return the list of matching pods. 731 func WaitForPodsWithLabelRunningReady(ctx context.Context, c clientset.Interface, ns string, label labels.Selector, num int, timeout time.Duration) (pods *v1.PodList, err error) { 732 opts := metav1.ListOptions{LabelSelector: label.String()} 733 return WaitForPods(ctx, c, ns, opts, Range{MinFound: num, AllMatching: true}, timeout, "be running and ready", RunningReady) 734 } 735 736 // WaitForNRestartablePods tries to list restarting pods using ps until it finds expect of them, 737 // returning their names if it can do so before timeout. 738 func WaitForNRestartablePods(ctx context.Context, ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) { 739 var pods []*v1.Pod 740 741 get := func(ctx context.Context) ([]*v1.Pod, error) { 742 return ps.List(), nil 743 } 744 745 match := func(allPods []*v1.Pod) (func() string, error) { 746 pods = FilterNonRestartablePods(allPods) 747 if len(pods) != expect { 748 return func() string { 749 return fmt.Sprintf("expected to find non-restartable %d pods, but found %d:\n%s", expect, len(pods), format.Object(pods, 1)) 750 }, nil 751 } 752 return nil, nil 753 } 754 755 err := framework.Gomega(). 756 Eventually(ctx, framework.HandleRetry(get)). 757 WithTimeout(timeout). 758 Should(framework.MakeMatcher(match)) 759 if err != nil { 760 return nil, err 761 } 762 763 podNames := make([]string, len(pods)) 764 for i, p := range pods { 765 podNames[i] = p.Name 766 } 767 return podNames, nil 768 } 769 770 // WaitForPodContainerToFail waits for the given Pod container to fail with the given reason, specifically due to 771 // invalid container configuration. In this case, the container will remain in a waiting state with a specific 772 // reason set, which should match the given reason. 773 func WaitForPodContainerToFail(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, reason string, timeout time.Duration) error { 774 conditionDesc := fmt.Sprintf("container %d failed with reason %s", containerIndex, reason) 775 return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 776 switch pod.Status.Phase { 777 case v1.PodPending: 778 if len(pod.Status.ContainerStatuses) == 0 { 779 return false, nil 780 } 781 containerStatus := pod.Status.ContainerStatuses[containerIndex] 782 if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == reason { 783 return true, nil 784 } 785 return false, nil 786 case v1.PodFailed, v1.PodRunning, v1.PodSucceeded: 787 return false, fmt.Errorf("pod was expected to be pending, but it is in the state: %s", pod.Status.Phase) 788 } 789 return false, nil 790 }) 791 } 792 793 // WaitForPodScheduled waits for the pod to be schedule, ie. the .spec.nodeName is set 794 func WaitForPodScheduled(ctx context.Context, c clientset.Interface, namespace, podName string) error { 795 return WaitForPodCondition(ctx, c, namespace, podName, "pod is scheduled", podScheduledBeforeTimeout, func(pod *v1.Pod) (bool, error) { 796 return pod.Spec.NodeName != "", nil 797 }) 798 } 799 800 // WaitForPodContainerStarted waits for the given Pod container to start, after a successful run of the startupProbe. 801 func WaitForPodContainerStarted(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, timeout time.Duration) error { 802 conditionDesc := fmt.Sprintf("container %d started", containerIndex) 803 return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 804 if containerIndex > len(pod.Status.ContainerStatuses)-1 { 805 return false, nil 806 } 807 containerStatus := pod.Status.ContainerStatuses[containerIndex] 808 return *containerStatus.Started, nil 809 }) 810 } 811 812 // WaitForPodFailedReason wait for pod failed reason in status, for example "SysctlForbidden". 813 func WaitForPodFailedReason(ctx context.Context, c clientset.Interface, pod *v1.Pod, reason string, timeout time.Duration) error { 814 conditionDesc := fmt.Sprintf("failed with reason %s", reason) 815 return WaitForPodCondition(ctx, c, pod.Namespace, pod.Name, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 816 switch pod.Status.Phase { 817 case v1.PodSucceeded: 818 return true, errors.New("pod succeeded unexpectedly") 819 case v1.PodFailed: 820 if pod.Status.Reason == reason { 821 return true, nil 822 } else { 823 return true, fmt.Errorf("pod failed with reason %s", pod.Status.Reason) 824 } 825 } 826 return false, nil 827 }) 828 } 829 830 // WaitForContainerRunning waits for the given Pod container to have a state of running 831 func WaitForContainerRunning(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error { 832 conditionDesc := fmt.Sprintf("container %s running", containerName) 833 return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 834 for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} { 835 for _, cs := range statuses { 836 if cs.Name == containerName { 837 return cs.State.Running != nil, nil 838 } 839 } 840 } 841 return false, nil 842 }) 843 } 844 845 // WaitForContainerTerminated waits for the given Pod container to have a state of terminated 846 func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration) error { 847 conditionDesc := fmt.Sprintf("container %s terminated", containerName) 848 return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 849 for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} { 850 for _, cs := range statuses { 851 if cs.Name == containerName { 852 return cs.State.Terminated != nil, nil 853 } 854 } 855 } 856 return false, nil 857 }) 858 }