k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/common/node/container_probe.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package node 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "net/url" 24 "strings" 25 "time" 26 27 v1 "k8s.io/api/core/v1" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/fields" 30 "k8s.io/apimachinery/pkg/runtime" 31 "k8s.io/apimachinery/pkg/util/intstr" 32 "k8s.io/apimachinery/pkg/util/uuid" 33 "k8s.io/apimachinery/pkg/watch" 34 clientset "k8s.io/client-go/kubernetes" 35 "k8s.io/client-go/tools/cache" 36 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 37 "k8s.io/kubernetes/pkg/kubelet/events" 38 "k8s.io/kubernetes/test/e2e/feature" 39 "k8s.io/kubernetes/test/e2e/framework" 40 e2eevents "k8s.io/kubernetes/test/e2e/framework/events" 41 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 42 "k8s.io/kubernetes/test/e2e/nodefeature" 43 testutils "k8s.io/kubernetes/test/utils" 44 imageutils "k8s.io/kubernetes/test/utils/image" 45 admissionapi "k8s.io/pod-security-admission/api" 46 47 "github.com/onsi/ginkgo/v2" 48 "github.com/onsi/gomega" 49 ) 50 51 const ( 52 probeTestInitialDelaySeconds = 15 53 54 defaultObservationTimeout = time.Minute * 4 55 ) 56 57 var _ = SIGDescribe("Probing container", func() { 58 f := framework.NewDefaultFramework("container-probe") 59 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 60 var podClient *e2epod.PodClient 61 probe := webserverProbeBuilder{} 62 63 ginkgo.BeforeEach(func() { 64 podClient = e2epod.NewPodClient(f) 65 }) 66 67 /* 68 Release: v1.9 69 Testname: Pod readiness probe, with initial delay 70 Description: Create a Pod that is configured with a initial delay set on the readiness probe. Check the Pod Start time to compare to the initial delay. The Pod MUST be ready only after the specified initial delay. 71 */ 72 framework.ConformanceIt("with readiness probe should not be ready before initial delay and never restart", f.WithNodeConformance(), func(ctx context.Context) { 73 containerName := "test-webserver" 74 p := podClient.Create(ctx, testWebServerPodSpec(probe.withInitialDelay().build(), nil, containerName, 80)) 75 framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout)) 76 77 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 78 framework.ExpectNoError(err) 79 isReady, err := testutils.PodRunningReady(p) 80 framework.ExpectNoError(err) 81 if !isReady { 82 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 83 } 84 85 // We assume the pod became ready when the container became ready. This 86 // is true for a single container pod. 87 readyTime, err := GetTransitionTimeForReadyCondition(p) 88 framework.ExpectNoError(err) 89 startedTime, err := GetContainerStartedTime(p, containerName) 90 framework.ExpectNoError(err) 91 92 framework.Logf("Container started at %v, pod became ready at %v", startedTime, readyTime) 93 initialDelay := probeTestInitialDelaySeconds * time.Second 94 if readyTime.Sub(startedTime) < initialDelay { 95 framework.Failf("Pod became ready before it's %v initial delay", initialDelay) 96 } 97 98 restartCount := getRestartCount(p) 99 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 100 }) 101 102 /* 103 Release: v1.9 104 Testname: Pod readiness probe, failure 105 Description: Create a Pod with a readiness probe that fails consistently. When this Pod is created, 106 then the Pod MUST never be ready, never be running and restart count MUST be zero. 107 */ 108 framework.ConformanceIt("with readiness probe that fails should never be ready and never restart", f.WithNodeConformance(), func(ctx context.Context) { 109 p := podClient.Create(ctx, testWebServerPodSpec(probe.withFailing().build(), nil, "test-webserver", 80)) 110 gomega.Consistently(ctx, func() (bool, error) { 111 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 112 if err != nil { 113 return false, err 114 } 115 return podutil.IsPodReady(p), nil 116 }, 1*time.Minute, 1*time.Second).ShouldNot(gomega.BeTrue(), "pod should not be ready") 117 118 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 119 framework.ExpectNoError(err) 120 121 isReady, _ := testutils.PodRunningReady(p) 122 if isReady { 123 framework.Failf("pod %s/%s should be not ready", f.Namespace.Name, p.Name) 124 } 125 126 restartCount := getRestartCount(p) 127 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 128 }) 129 130 /* 131 Release: v1.9 132 Testname: Pod liveness probe, using local file, restart 133 Description: Create a Pod with liveness probe that uses ExecAction handler to cat /temp/health file. The Container deletes the file /temp/health after 10 second, triggering liveness probe to fail. The Pod MUST now be killed and restarted incrementing restart count to 1. 134 */ 135 framework.ConformanceIt("should be restarted with a exec \"cat /tmp/health\" liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 136 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 10; rm -rf /tmp/health; sleep 600"} 137 livenessProbe := &v1.Probe{ 138 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 139 InitialDelaySeconds: 15, 140 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 141 FailureThreshold: 1, 142 } 143 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 144 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 145 }) 146 147 /* 148 Release: v1.9 149 Testname: Pod liveness probe, using local file, no restart 150 Description: Pod is created with liveness probe that uses 'exec' command to cat /temp/health file. Liveness probe MUST not fail to check health and the restart count should remain 0. 151 */ 152 framework.ConformanceIt("should *not* be restarted with a exec \"cat /tmp/health\" liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 153 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 600"} 154 livenessProbe := &v1.Probe{ 155 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 156 InitialDelaySeconds: 15, 157 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 158 FailureThreshold: 1, 159 } 160 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 161 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 162 }) 163 164 /* 165 Release: v1.9 166 Testname: Pod liveness probe, using http endpoint, restart 167 Description: A Pod is created with liveness probe on http endpoint /healthz. The http handler on the /healthz will return a http error after 10 seconds since the Pod is started. This MUST result in liveness check failure. The Pod MUST now be killed and restarted incrementing restart count to 1. 168 */ 169 framework.ConformanceIt("should be restarted with a /healthz http liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 170 livenessProbe := &v1.Probe{ 171 ProbeHandler: httpGetHandler("/healthz", 8080), 172 InitialDelaySeconds: 15, 173 FailureThreshold: 1, 174 } 175 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 176 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 177 }) 178 179 /* 180 Release: v1.18 181 Testname: Pod liveness probe, using tcp socket, no restart 182 Description: A Pod is created with liveness probe on tcp socket 8080. The http handler on port 8080 will return http errors after 10 seconds, but the socket will remain open. Liveness probe MUST not fail to check health and the restart count should remain 0. 183 */ 184 framework.ConformanceIt("should *not* be restarted with a tcp:8080 liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 185 livenessProbe := &v1.Probe{ 186 ProbeHandler: tcpSocketHandler(8080), 187 InitialDelaySeconds: 15, 188 FailureThreshold: 1, 189 } 190 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 191 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 192 }) 193 194 /* 195 Release: v1.9 196 Testname: Pod liveness probe, using http endpoint, multiple restarts (slow) 197 Description: A Pod is created with liveness probe on http endpoint /healthz. The http handler on the /healthz will return a http error after 10 seconds since the Pod is started. This MUST result in liveness check failure. The Pod MUST now be killed and restarted incrementing restart count to 1. The liveness probe must fail again after restart once the http handler for /healthz enpoind on the Pod returns an http error after 10 seconds from the start. Restart counts MUST increment every time health check fails, measure up to 5 restart. 198 */ 199 framework.ConformanceIt("should have monotonically increasing restart count", f.WithNodeConformance(), func(ctx context.Context) { 200 livenessProbe := &v1.Probe{ 201 ProbeHandler: httpGetHandler("/healthz", 8080), 202 InitialDelaySeconds: 5, 203 FailureThreshold: 1, 204 } 205 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 206 // ~2 minutes backoff timeouts + 4 minutes defaultObservationTimeout + 2 minutes for each pod restart 207 RunLivenessTest(ctx, f, pod, 5, 2*time.Minute+defaultObservationTimeout+4*2*time.Minute) 208 }) 209 210 /* 211 Release: v1.9 212 Testname: Pod liveness probe, using http endpoint, failure 213 Description: A Pod is created with liveness probe on http endpoint '/'. Liveness probe on this endpoint will not fail. When liveness probe does not fail then the restart count MUST remain zero. 214 */ 215 framework.ConformanceIt("should *not* be restarted with a /healthz http liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 216 livenessProbe := &v1.Probe{ 217 ProbeHandler: httpGetHandler("/", 80), 218 InitialDelaySeconds: 15, 219 TimeoutSeconds: 5, 220 FailureThreshold: 5, // to accommodate nodes which are slow in bringing up containers. 221 } 222 pod := testWebServerPodSpec(nil, livenessProbe, "test-webserver", 80) 223 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 224 }) 225 226 /* 227 Release: v1.9 228 Testname: Pod liveness probe, container exec timeout, restart 229 Description: A Pod is created with liveness probe with a Exec action on the Pod. If the liveness probe call does not return within the timeout specified, liveness probe MUST restart the Pod. 230 */ 231 f.It("should be restarted with an exec liveness probe with timeout [MinimumKubeletVersion:1.20]", f.WithNodeConformance(), func(ctx context.Context) { 232 cmd := []string{"/bin/sh", "-c", "sleep 600"} 233 livenessProbe := &v1.Probe{ 234 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 235 InitialDelaySeconds: 15, 236 TimeoutSeconds: 1, 237 FailureThreshold: 1, 238 } 239 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 240 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 241 }) 242 243 /* 244 Release: v1.20 245 Testname: Pod readiness probe, container exec timeout, not ready 246 Description: A Pod is created with readiness probe with a Exec action on the Pod. If the readiness probe call does not return within the timeout specified, readiness probe MUST not be Ready. 247 */ 248 f.It("should not be ready with an exec readiness probe timeout [MinimumKubeletVersion:1.20]", f.WithNodeConformance(), func(ctx context.Context) { 249 cmd := []string{"/bin/sh", "-c", "sleep 600"} 250 readinessProbe := &v1.Probe{ 251 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 252 InitialDelaySeconds: 15, 253 TimeoutSeconds: 1, 254 FailureThreshold: 1, 255 } 256 pod := busyBoxPodSpec(readinessProbe, nil, cmd) 257 runReadinessFailTest(ctx, f, pod, time.Minute, true) 258 }) 259 260 /* 261 Release: v1.21 262 Testname: Pod liveness probe, container exec timeout, restart 263 Description: A Pod is created with liveness probe with a Exec action on the Pod. If the liveness probe call does not return within the timeout specified, liveness probe MUST restart the Pod. When ExecProbeTimeout feature gate is disabled and cluster is using dockershim, the timeout is ignored BUT a failing liveness probe MUST restart the Pod. 264 */ 265 ginkgo.It("should be restarted with a failing exec liveness probe that took longer than the timeout", func(ctx context.Context) { 266 cmd := []string{"/bin/sh", "-c", "sleep 600"} 267 livenessProbe := &v1.Probe{ 268 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10 & exit 1"}), 269 InitialDelaySeconds: 15, 270 TimeoutSeconds: 1, 271 FailureThreshold: 1, 272 } 273 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 274 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 275 }) 276 277 /* 278 Release: v1.14 279 Testname: Pod http liveness probe, redirected to a local address 280 Description: A Pod is created with liveness probe on http endpoint /redirect?loc=healthz. The http handler on the /redirect will redirect to the /healthz endpoint, which will return a http error after 10 seconds since the Pod is started. This MUST result in liveness check failure. The Pod MUST now be killed and restarted incrementing restart count to 1. 281 */ 282 ginkgo.It("should be restarted with a local redirect http liveness probe", func(ctx context.Context) { 283 livenessProbe := &v1.Probe{ 284 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("/healthz"), 8080), 285 InitialDelaySeconds: 15, 286 FailureThreshold: 1, 287 } 288 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 289 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 290 }) 291 292 /* 293 Release: v1.14 294 Testname: Pod http liveness probe, redirected to a non-local address 295 Description: A Pod is created with liveness probe on http endpoint /redirect with a redirect to http://0.0.0.0/. The http handler on the /redirect should not follow the redirect, but instead treat it as a success and generate an event. 296 */ 297 ginkgo.It("should *not* be restarted with a non-local redirect http liveness probe", func(ctx context.Context) { 298 livenessProbe := &v1.Probe{ 299 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("http://0.0.0.0/"), 8080), 300 InitialDelaySeconds: 15, 301 FailureThreshold: 1, 302 } 303 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 304 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 305 // Expect an event of type "ProbeWarning". 306 expectedEvent := fields.Set{ 307 "involvedObject.kind": "Pod", 308 "involvedObject.name": pod.Name, 309 "involvedObject.namespace": f.Namespace.Name, 310 "reason": events.ContainerProbeWarning, 311 }.AsSelector().String() 312 framework.ExpectNoError(e2eevents.WaitTimeoutForEvent( 313 ctx, f.ClientSet, f.Namespace.Name, expectedEvent, "Probe terminated redirects, Response body: <a href=\"http://0.0.0.0/\">Found</a>.", framework.PodEventTimeout)) 314 }) 315 316 /* 317 Release: v1.16 318 Testname: Pod startup probe restart 319 Description: A Pod is created with a failing startup probe. The Pod MUST be killed and restarted incrementing restart count to 1, even if liveness would succeed. 320 */ 321 ginkgo.It("should be restarted startup probe fails", func(ctx context.Context) { 322 cmd := []string{"/bin/sh", "-c", "sleep 600"} 323 livenessProbe := &v1.Probe{ 324 ProbeHandler: v1.ProbeHandler{ 325 Exec: &v1.ExecAction{ 326 Command: []string{"/bin/true"}, 327 }, 328 }, 329 InitialDelaySeconds: 15, 330 FailureThreshold: 1, 331 } 332 startupProbe := &v1.Probe{ 333 ProbeHandler: v1.ProbeHandler{ 334 Exec: &v1.ExecAction{ 335 Command: []string{"/bin/false"}, 336 }, 337 }, 338 InitialDelaySeconds: 15, 339 FailureThreshold: 3, 340 } 341 pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) 342 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 343 }) 344 345 /* 346 Release: v1.16 347 Testname: Pod liveness probe delayed (long) by startup probe 348 Description: A Pod is created with failing liveness and startup probes. Liveness probe MUST NOT fail until startup probe expires. 349 */ 350 ginkgo.It("should *not* be restarted by liveness probe because startup probe delays it", func(ctx context.Context) { 351 cmd := []string{"/bin/sh", "-c", "sleep 600"} 352 livenessProbe := &v1.Probe{ 353 ProbeHandler: v1.ProbeHandler{ 354 Exec: &v1.ExecAction{ 355 Command: []string{"/bin/false"}, 356 }, 357 }, 358 InitialDelaySeconds: 15, 359 FailureThreshold: 1, 360 } 361 startupProbe := &v1.Probe{ 362 ProbeHandler: v1.ProbeHandler{ 363 Exec: &v1.ExecAction{ 364 Command: []string{"/bin/false"}, 365 }, 366 }, 367 InitialDelaySeconds: 15, 368 FailureThreshold: 60, 369 } 370 pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) 371 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 372 }) 373 374 /* 375 Release: v1.16 376 Testname: Pod liveness probe fails after startup success 377 Description: A Pod is created with failing liveness probe and delayed startup probe that uses 'exec' command to cat /temp/health file. The Container is started by creating /tmp/startup after 10 seconds, triggering liveness probe to fail. The Pod MUST now be killed and restarted incrementing restart count to 1. 378 */ 379 ginkgo.It("should be restarted by liveness probe after startup probe enables it", func(ctx context.Context) { 380 cmd := []string{"/bin/sh", "-c", "sleep 10; echo ok >/tmp/startup; sleep 600"} 381 livenessProbe := &v1.Probe{ 382 ProbeHandler: v1.ProbeHandler{ 383 Exec: &v1.ExecAction{ 384 Command: []string{"/bin/false"}, 385 }, 386 }, 387 InitialDelaySeconds: 15, 388 FailureThreshold: 1, 389 } 390 startupProbe := &v1.Probe{ 391 ProbeHandler: v1.ProbeHandler{ 392 Exec: &v1.ExecAction{ 393 Command: []string{"cat", "/tmp/startup"}, 394 }, 395 }, 396 InitialDelaySeconds: 15, 397 FailureThreshold: 60, 398 } 399 pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) 400 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 401 }) 402 403 /* 404 Release: v1.16 405 Testname: Pod readiness probe, delayed by startup probe 406 Description: A Pod is created with startup and readiness probes. The Container is started by creating /tmp/startup after 45 seconds, delaying the ready state by this amount of time. This is similar to the "Pod readiness probe, with initial delay" test. 407 */ 408 ginkgo.It("should be ready immediately after startupProbe succeeds", func(ctx context.Context) { 409 // Probe workers sleep at Kubelet start for a random time which is at most PeriodSeconds 410 // this test requires both readiness and startup workers running before updating statuses 411 // to avoid flakes, ensure sleep before startup (32s) > readinessProbe.PeriodSeconds 412 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 32; echo ok >/tmp/startup; sleep 600"} 413 readinessProbe := &v1.Probe{ 414 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/health"}), 415 InitialDelaySeconds: 0, 416 PeriodSeconds: 30, 417 } 418 startupProbe := &v1.Probe{ 419 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 420 InitialDelaySeconds: 0, 421 FailureThreshold: 120, 422 PeriodSeconds: 5, 423 } 424 p := podClient.Create(ctx, startupPodSpec(startupProbe, readinessProbe, nil, cmd)) 425 426 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 427 framework.ExpectNoError(err) 428 429 err = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, p.Name, 0, framework.PodStartTimeout) 430 framework.ExpectNoError(err) 431 startedTime := time.Now() 432 433 // We assume the pod became ready when the container became ready. This 434 // is true for a single container pod. 435 err = e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout) 436 framework.ExpectNoError(err) 437 readyTime := time.Now() 438 439 p, err = podClient.Get(ctx, p.Name, metav1.GetOptions{}) 440 framework.ExpectNoError(err) 441 442 isReady, err := testutils.PodRunningReady(p) 443 framework.ExpectNoError(err) 444 if !isReady { 445 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 446 } 447 448 readyIn := readyTime.Sub(startedTime) 449 framework.Logf("Container started at %v, pod became ready at %v, %v after startupProbe succeeded", startedTime, readyTime, readyIn) 450 if readyIn < 0 { 451 framework.Failf("Pod became ready before startupProbe succeeded") 452 } 453 if readyIn > 25*time.Second { 454 framework.Failf("Pod became ready in %v, more than 25s after startupProbe succeeded. It means that the delay readiness probes were not initiated immediately after startup finished.", readyIn) 455 } 456 }) 457 458 /* 459 Release: v1.21 460 Testname: Set terminationGracePeriodSeconds for livenessProbe 461 Description: A pod with a long terminationGracePeriod is created with a shorter livenessProbe-level terminationGracePeriodSeconds. We confirm the shorter termination period is used. 462 */ 463 f.It("should override timeoutGracePeriodSeconds when LivenessProbe field is set", f.WithNodeConformance(), func(ctx context.Context) { 464 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 465 // probe will fail since pod has no http endpoints 466 shortGracePeriod := int64(5) 467 livenessProbe := &v1.Probe{ 468 ProbeHandler: v1.ProbeHandler{ 469 HTTPGet: &v1.HTTPGetAction{ 470 Path: "/healthz", 471 Port: intstr.FromInt32(8080), 472 }, 473 }, 474 InitialDelaySeconds: 10, 475 FailureThreshold: 1, 476 TerminationGracePeriodSeconds: &shortGracePeriod, 477 } 478 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 479 longGracePeriod := int64(500) 480 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 481 482 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 483 // add defaultObservationTimeout(4min) more for kubelet syncing information 484 // to apiserver 485 RunLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 486 }) 487 488 /* 489 Release: v1.21 490 Testname: Set terminationGracePeriodSeconds for startupProbe 491 Description: A pod with a long terminationGracePeriod is created with a shorter startupProbe-level terminationGracePeriodSeconds. We confirm the shorter termination period is used. 492 */ 493 f.It("should override timeoutGracePeriodSeconds when StartupProbe field is set", f.WithNodeConformance(), func(ctx context.Context) { 494 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 495 // probe will fail since pod has no http endpoints 496 livenessProbe := &v1.Probe{ 497 ProbeHandler: v1.ProbeHandler{ 498 Exec: &v1.ExecAction{ 499 Command: []string{"/bin/true"}, 500 }, 501 }, 502 InitialDelaySeconds: 15, 503 FailureThreshold: 1, 504 } 505 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 506 longGracePeriod := int64(500) 507 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 508 509 shortGracePeriod := int64(5) 510 pod.Spec.Containers[0].StartupProbe = &v1.Probe{ 511 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 512 InitialDelaySeconds: 10, 513 FailureThreshold: 1, 514 TerminationGracePeriodSeconds: &shortGracePeriod, 515 } 516 517 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 518 // add defaultObservationTimeout(4min) more for kubelet syncing information 519 // to apiserver 520 RunLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 521 }) 522 523 /* 524 Release: v1.23 525 Testname: Pod liveness probe, using grpc call, success 526 Description: A Pod is created with liveness probe on grpc service. Liveness probe on this endpoint will not fail. When liveness probe does not fail then the restart count MUST remain zero. 527 */ 528 framework.ConformanceIt("should *not* be restarted with a GRPC liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 529 livenessProbe := &v1.Probe{ 530 ProbeHandler: v1.ProbeHandler{ 531 GRPC: &v1.GRPCAction{ 532 Port: 5000, 533 Service: nil, 534 }, 535 }, 536 InitialDelaySeconds: probeTestInitialDelaySeconds, 537 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 538 FailureThreshold: 1, 539 } 540 541 pod := gRPCServerPodSpec(nil, livenessProbe, "agnhost") 542 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 543 }) 544 545 /* 546 Release: v1.23 547 Testname: Pod liveness probe, using grpc call, failure 548 Description: A Pod is created with liveness probe on grpc service. Liveness probe on this endpoint should fail because of wrong probe port. 549 When liveness probe does fail then the restart count should +1. 550 */ 551 framework.ConformanceIt("should be restarted with a GRPC liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 552 livenessProbe := &v1.Probe{ 553 ProbeHandler: v1.ProbeHandler{ 554 GRPC: &v1.GRPCAction{ 555 Port: 2333, // this port is wrong 556 }, 557 }, 558 InitialDelaySeconds: probeTestInitialDelaySeconds * 4, 559 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 560 FailureThreshold: 1, 561 } 562 pod := gRPCServerPodSpec(nil, livenessProbe, "agnhost") 563 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 564 }) 565 566 ginkgo.It("should mark readiness on pods to false while pod is in progress of terminating when a pod has a readiness probe", func(ctx context.Context) { 567 podName := "probe-test-" + string(uuid.NewUUID()) 568 podClient := e2epod.NewPodClient(f) 569 terminationGracePeriod := int64(30) 570 script := ` 571 _term() { 572 rm -f /tmp/ready 573 sleep 30 574 exit 0 575 } 576 trap _term SIGTERM 577 578 touch /tmp/ready 579 580 while true; do 581 echo \"hello\" 582 sleep 10 583 done 584 ` 585 586 // Create Pod 587 podClient.Create(ctx, &v1.Pod{ 588 ObjectMeta: metav1.ObjectMeta{ 589 Name: podName, 590 }, 591 Spec: v1.PodSpec{ 592 Containers: []v1.Container{ 593 { 594 Image: imageutils.GetE2EImage(imageutils.Agnhost), 595 Name: podName, 596 Command: []string{"/bin/bash"}, 597 Args: []string{"-c", script}, 598 ReadinessProbe: &v1.Probe{ 599 ProbeHandler: v1.ProbeHandler{ 600 Exec: &v1.ExecAction{ 601 Command: []string{"cat", "/tmp/ready"}, 602 }, 603 }, 604 FailureThreshold: 1, 605 InitialDelaySeconds: 5, 606 PeriodSeconds: 2, 607 }, 608 }, 609 }, 610 TerminationGracePeriodSeconds: &terminationGracePeriod, 611 }, 612 }) 613 614 // verify pods are running and ready 615 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) 616 framework.ExpectNoError(err) 617 618 // Shutdown pod. Readiness should change to false 619 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 620 framework.ExpectNoError(err) 621 622 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 623 if !podutil.IsPodReady(pod) { 624 return true, nil 625 } 626 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 627 return false, nil 628 }) 629 framework.ExpectNoError(err) 630 }) 631 632 ginkgo.It("should mark readiness on pods to false and disable liveness probes while pod is in progress of terminating", func(ctx context.Context) { 633 podName := "probe-test-" + string(uuid.NewUUID()) 634 podClient := e2epod.NewPodClient(f) 635 terminationGracePeriod := int64(30) 636 script := ` 637 _term() { 638 rm -f /tmp/ready 639 rm -f /tmp/liveness 640 sleep 20 641 exit 0 642 } 643 trap _term SIGTERM 644 645 touch /tmp/ready 646 touch /tmp/liveness 647 648 while true; do 649 echo \"hello\" 650 sleep 10 651 done 652 ` 653 654 // Create Pod 655 podClient.Create(ctx, &v1.Pod{ 656 ObjectMeta: metav1.ObjectMeta{ 657 Name: podName, 658 }, 659 Spec: v1.PodSpec{ 660 Containers: []v1.Container{ 661 { 662 Image: imageutils.GetE2EImage(imageutils.Agnhost), 663 Name: podName, 664 Command: []string{"/bin/bash"}, 665 Args: []string{"-c", script}, 666 ReadinessProbe: &v1.Probe{ 667 ProbeHandler: v1.ProbeHandler{ 668 Exec: &v1.ExecAction{ 669 Command: []string{"cat", "/tmp/ready"}, 670 }, 671 }, 672 FailureThreshold: 1, 673 // delay startup to make sure the script script has 674 // time to create the ready+liveness files 675 InitialDelaySeconds: 5, 676 PeriodSeconds: 2, 677 }, 678 LivenessProbe: &v1.Probe{ 679 ProbeHandler: v1.ProbeHandler{ 680 Exec: &v1.ExecAction{ 681 Command: []string{"cat", "/tmp/liveness"}, 682 }, 683 }, 684 FailureThreshold: 1, 685 // delay startup to make sure the script script has 686 // time to create the ready+liveness files 687 InitialDelaySeconds: 5, 688 PeriodSeconds: 1, 689 }, 690 }, 691 }, 692 TerminationGracePeriodSeconds: &terminationGracePeriod, 693 }, 694 }) 695 696 // verify pods are running and ready 697 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) 698 framework.ExpectNoError(err) 699 700 // Shutdown pod. Readiness should change to false 701 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 702 framework.ExpectNoError(err) 703 704 // Wait for pod to go unready 705 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 706 if !podutil.IsPodReady(pod) { 707 return true, nil 708 } 709 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 710 return false, nil 711 }) 712 framework.ExpectNoError(err) 713 714 // Verify there are zero liveness failures since they are turned off 715 // during pod termination 716 gomega.Consistently(ctx, func(ctx context.Context) (bool, error) { 717 items, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) 718 framework.ExpectNoError(err) 719 for _, event := range items.Items { 720 // Search only for the pod we are interested in 721 if event.InvolvedObject.Name != podName { 722 continue 723 } 724 if strings.Contains(event.Message, "failed liveness probe") { 725 return true, errors.New("should not see liveness probe failures") 726 } 727 } 728 return false, nil 729 }, 1*time.Minute, framework.Poll).ShouldNot(gomega.BeTrue(), "should not see liveness probes") 730 }) 731 }) 732 733 var _ = SIGDescribe(nodefeature.SidecarContainers, feature.SidecarContainers, "Probing restartable init container", func() { 734 f := framework.NewDefaultFramework("container-probe") 735 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 736 var podClient *e2epod.PodClient 737 probe := webserverProbeBuilder{} 738 739 ginkgo.BeforeEach(func() { 740 podClient = e2epod.NewPodClient(f) 741 }) 742 743 /* 744 Release: v1.28 745 Testname: Pod restartable init container readiness probe, with initial delay 746 Description: Create a Pod that is configured with a initial delay set on 747 the readiness probe. Check the Pod Start time to compare to the initial 748 delay. The Pod MUST be ready only after the specified initial delay. 749 */ 750 ginkgo.It("with readiness probe should not be ready before initial delay and never restart", func(ctx context.Context) { 751 containerName := "test-webserver" 752 p := podClient.Create(ctx, testWebServerSidecarPodSpec(probe.withInitialDelay().build(), nil, containerName, 80)) 753 framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout)) 754 755 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 756 framework.ExpectNoError(err) 757 isReady, err := testutils.PodRunningReady(p) 758 framework.ExpectNoError(err) 759 if !isReady { 760 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 761 } 762 763 // We assume the pod became ready when the container became ready. This 764 // is true for a single container pod. 765 readyTime, err := GetTransitionTimeForReadyCondition(p) 766 framework.ExpectNoError(err) 767 startedTime, err := GetContainerStartedTime(p, containerName) 768 framework.ExpectNoError(err) 769 770 framework.Logf("Container started at %v, pod became ready at %v", startedTime, readyTime) 771 initialDelay := probeTestInitialDelaySeconds * time.Second 772 if readyTime.Sub(startedTime) < initialDelay { 773 framework.Failf("Pod became ready before it's %v initial delay", initialDelay) 774 } 775 776 restartCount := getRestartCount(p) 777 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 778 }) 779 780 /* 781 Release: v1.28 782 Testname: Pod restartable init container readiness probe, failure 783 Description: Create a Pod with a readiness probe that fails consistently. 784 When this Pod is created, then the Pod MUST never be ready, never be 785 running and restart count MUST be zero. 786 */ 787 ginkgo.It("with readiness probe that fails should never be ready and never restart", func(ctx context.Context) { 788 p := podClient.Create(ctx, testWebServerSidecarPodSpec(probe.withFailing().build(), nil, "test-webserver", 80)) 789 gomega.Consistently(ctx, func() (bool, error) { 790 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 791 if err != nil { 792 return false, err 793 } 794 return podutil.IsPodReady(p), nil 795 }, 1*time.Minute, 1*time.Second).ShouldNot(gomega.BeTrue(), "pod should not be ready") 796 797 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 798 framework.ExpectNoError(err) 799 800 isReady, _ := testutils.PodRunningReady(p) 801 if isReady { 802 framework.Failf("pod %s/%s should be not ready", f.Namespace.Name, p.Name) 803 } 804 805 restartCount := getRestartCount(p) 806 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 807 }) 808 809 /* 810 Release: v1.28 811 Testname: Pod restartable init container liveness probe, using local file, restart 812 Description: Create a Pod with liveness probe that uses ExecAction handler 813 to cat /temp/health file. The Container deletes the file /temp/health after 814 10 second, triggering liveness probe to fail. The Pod MUST now be killed 815 and restarted incrementing restart count to 1. 816 */ 817 ginkgo.It("should be restarted with a exec \"cat /tmp/health\" liveness probe", func(ctx context.Context) { 818 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 10; rm -rf /tmp/health; sleep 600"} 819 livenessProbe := &v1.Probe{ 820 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 821 InitialDelaySeconds: 15, 822 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 823 FailureThreshold: 1, 824 } 825 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 826 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 827 }) 828 829 /* 830 Release: v1.28 831 Testname: Pod restartable init container liveness probe, using local file, no restart 832 Description: Pod is created with liveness probe that uses 'exec' command 833 to cat /temp/health file. Liveness probe MUST not fail to check health and 834 the restart count should remain 0. 835 */ 836 ginkgo.It("should *not* be restarted with a exec \"cat /tmp/health\" liveness probe", func(ctx context.Context) { 837 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 600"} 838 livenessProbe := &v1.Probe{ 839 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 840 InitialDelaySeconds: 15, 841 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 842 FailureThreshold: 1, 843 } 844 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 845 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 846 }) 847 848 /* 849 Release: v1.28 850 Testname: Pod restartable init container liveness probe, using http endpoint, restart 851 Description: A Pod is created with liveness probe on http endpoint 852 /healthz. The http handler on the /healthz will return a http error after 853 10 seconds since the Pod is started. This MUST result in liveness check 854 failure. The Pod MUST now be killed and restarted incrementing restart 855 count to 1. 856 */ 857 ginkgo.It("should be restarted with a /healthz http liveness probe", func(ctx context.Context) { 858 livenessProbe := &v1.Probe{ 859 ProbeHandler: httpGetHandler("/healthz", 8080), 860 InitialDelaySeconds: 15, 861 TimeoutSeconds: 5, 862 FailureThreshold: 1, 863 } 864 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 865 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 866 }) 867 868 /* 869 Release: v1.28 870 Testname: Pod restartable init container liveness probe, using tcp socket, no restart 871 Description: A Pod is created with liveness probe on tcp socket 8080. The 872 http handler on port 8080 will return http errors after 10 seconds, but the 873 socket will remain open. Liveness probe MUST not fail to check health and 874 the restart count should remain 0. 875 */ 876 ginkgo.It("should *not* be restarted with a tcp:8080 liveness probe", func(ctx context.Context) { 877 livenessProbe := &v1.Probe{ 878 ProbeHandler: tcpSocketHandler(8080), 879 InitialDelaySeconds: 15, 880 TimeoutSeconds: 5, 881 FailureThreshold: 1, 882 } 883 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 884 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 885 }) 886 887 /* 888 Release: v1.28 889 Testname: Pod restartable init container liveness probe, using http endpoint, multiple restarts (slow) 890 Description: A Pod is created with liveness probe on http endpoint 891 /healthz. The http handler on the /healthz will return a http error after 892 10 seconds since the Pod is started. This MUST result in liveness check 893 failure. The Pod MUST now be killed and restarted incrementing restart 894 count to 1. The liveness probe must fail again after restart once the http 895 handler for /healthz enpoind on the Pod returns an http error after 10 896 seconds from the start. Restart counts MUST increment every time health 897 check fails, measure up to 5 restart. 898 */ 899 ginkgo.It("should have monotonically increasing restart count", func(ctx context.Context) { 900 livenessProbe := &v1.Probe{ 901 ProbeHandler: httpGetHandler("/healthz", 8080), 902 InitialDelaySeconds: 5, 903 FailureThreshold: 1, 904 } 905 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 906 // ~2 minutes backoff timeouts + 4 minutes defaultObservationTimeout + 2 minutes for each pod restart 907 RunSidecarLivenessTest(ctx, f, pod, 5, 2*time.Minute+defaultObservationTimeout+4*2*time.Minute) 908 }) 909 910 /* 911 Release: v1.28 912 Testname: Pod restartable init container liveness probe, using http endpoint, failure 913 Description: A Pod is created with liveness probe on http endpoint '/'. 914 Liveness probe on this endpoint will not fail. When liveness probe does not 915 fail then the restart count MUST remain zero. 916 */ 917 ginkgo.It("should *not* be restarted with a /healthz http liveness probe", func(ctx context.Context) { 918 livenessProbe := &v1.Probe{ 919 ProbeHandler: httpGetHandler("/", 80), 920 InitialDelaySeconds: 15, 921 TimeoutSeconds: 5, 922 FailureThreshold: 5, // to accommodate nodes which are slow in bringing up containers. 923 } 924 pod := testWebServerSidecarPodSpec(nil, livenessProbe, "test-webserver", 80) 925 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 926 }) 927 928 /* 929 Release: v1.28 930 Testname: Pod restartable init container liveness probe, container exec timeout, restart 931 Description: A Pod is created with liveness probe with a Exec action on the 932 Pod. If the liveness probe call does not return within the timeout 933 specified, liveness probe MUST restart the Pod. 934 */ 935 ginkgo.It("should be restarted with an exec liveness probe with timeout [MinimumKubeletVersion:1.20]", func(ctx context.Context) { 936 cmd := []string{"/bin/sh", "-c", "sleep 600"} 937 livenessProbe := &v1.Probe{ 938 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 939 InitialDelaySeconds: 15, 940 TimeoutSeconds: 1, 941 FailureThreshold: 1, 942 } 943 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 944 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 945 }) 946 947 /* 948 Release: v1.28 949 Testname: Pod restartable init container readiness probe, container exec timeout, not ready 950 Description: A Pod is created with readiness probe with a Exec action on 951 the Pod. If the readiness probe call does not return within the timeout 952 specified, readiness probe MUST not be Ready. 953 */ 954 ginkgo.It("should not be ready with an exec readiness probe timeout [MinimumKubeletVersion:1.20]", func(ctx context.Context) { 955 cmd := []string{"/bin/sh", "-c", "sleep 600"} 956 readinessProbe := &v1.Probe{ 957 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 958 InitialDelaySeconds: 15, 959 TimeoutSeconds: 1, 960 FailureThreshold: 1, 961 } 962 pod := busyBoxSidecarPodSpec(readinessProbe, nil, cmd) 963 runReadinessFailTest(ctx, f, pod, time.Minute, false) 964 }) 965 966 /* 967 Release: v1.28 968 Testname: Pod restartalbe init container liveness probe, container exec timeout, restart 969 Description: A Pod is created with liveness probe with a Exec action on the 970 Pod. If the liveness probe call does not return within the timeout 971 specified, liveness probe MUST restart the Pod. When ExecProbeTimeout 972 feature gate is disabled and cluster is using dockershim, the timeout is 973 ignored BUT a failing liveness probe MUST restart the Pod. 974 */ 975 ginkgo.It("should be restarted with a failing exec liveness probe that took longer than the timeout", func(ctx context.Context) { 976 cmd := []string{"/bin/sh", "-c", "sleep 600"} 977 livenessProbe := &v1.Probe{ 978 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10 & exit 1"}), 979 InitialDelaySeconds: 15, 980 TimeoutSeconds: 1, 981 FailureThreshold: 1, 982 } 983 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 984 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 985 }) 986 987 /* 988 Release: v1.28 989 Testname: Pod restartable init container http liveness probe, redirected to a local address 990 Description: A Pod is created with liveness probe on http endpoint 991 /redirect?loc=healthz. The http handler on the /redirect will redirect to 992 the /healthz endpoint, which will return a http error after 10 seconds 993 since the Pod is started. This MUST result in liveness check failure. The 994 Pod MUST now be killed and restarted incrementing restart count to 1. 995 */ 996 ginkgo.It("should be restarted with a local redirect http liveness probe", func(ctx context.Context) { 997 livenessProbe := &v1.Probe{ 998 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("/healthz"), 8080), 999 InitialDelaySeconds: 15, 1000 FailureThreshold: 1, 1001 } 1002 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 1003 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1004 }) 1005 1006 /* 1007 Release: v1.28 1008 Testname: Pod restartable init container http liveness probe, redirected to a non-local address 1009 Description: A Pod is created with liveness probe on http endpoint 1010 /redirect with a redirect to http://0.0.0.0/. The http handler on the 1011 /redirect should not follow the redirect, but instead treat it as a success 1012 and generate an event. 1013 */ 1014 ginkgo.It("should *not* be restarted with a non-local redirect http liveness probe", func(ctx context.Context) { 1015 livenessProbe := &v1.Probe{ 1016 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("http://0.0.0.0/"), 8080), 1017 InitialDelaySeconds: 15, 1018 FailureThreshold: 1, 1019 } 1020 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 1021 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 1022 // Expect an event of type "ProbeWarning". 1023 expectedEvent := fields.Set{ 1024 "involvedObject.kind": "Pod", 1025 "involvedObject.name": pod.Name, 1026 "involvedObject.namespace": f.Namespace.Name, 1027 "reason": events.ContainerProbeWarning, 1028 }.AsSelector().String() 1029 framework.ExpectNoError(e2eevents.WaitTimeoutForEvent( 1030 ctx, f.ClientSet, f.Namespace.Name, expectedEvent, "Probe terminated redirects, Response body: <a href=\"http://0.0.0.0/\">Found</a>.", framework.PodEventTimeout)) 1031 }) 1032 1033 /* 1034 Release: v1.28 1035 Testname: Pod restartable init container startup probe restart 1036 Description: A Pod is created with a failing startup probe. The Pod MUST be 1037 killed and restarted incrementing restart count to 1, even if liveness 1038 would succeed. 1039 */ 1040 ginkgo.It("should be restarted startup probe fails", func(ctx context.Context) { 1041 cmd := []string{"/bin/sh", "-c", "sleep 600"} 1042 livenessProbe := &v1.Probe{ 1043 ProbeHandler: v1.ProbeHandler{ 1044 Exec: &v1.ExecAction{ 1045 Command: []string{"/bin/true"}, 1046 }, 1047 }, 1048 InitialDelaySeconds: 15, 1049 FailureThreshold: 1, 1050 } 1051 startupProbe := &v1.Probe{ 1052 ProbeHandler: v1.ProbeHandler{ 1053 Exec: &v1.ExecAction{ 1054 Command: []string{"/bin/false"}, 1055 }, 1056 }, 1057 InitialDelaySeconds: 15, 1058 FailureThreshold: 3, 1059 } 1060 pod := startupSidecarPodSpec(startupProbe, nil, livenessProbe, cmd) 1061 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1062 }) 1063 1064 /* 1065 Release: v1.28 1066 Testname: Pod restartable init container liveness probe delayed (long) by startup probe 1067 Description: A Pod is created with failing liveness and startup probes. 1068 Liveness probe MUST NOT fail until startup probe expires. 1069 */ 1070 ginkgo.It("should *not* be restarted by liveness probe because startup probe delays it", func(ctx context.Context) { 1071 cmd := []string{"/bin/sh", "-c", "sleep 600"} 1072 livenessProbe := &v1.Probe{ 1073 ProbeHandler: v1.ProbeHandler{ 1074 Exec: &v1.ExecAction{ 1075 Command: []string{"/bin/false"}, 1076 }, 1077 }, 1078 InitialDelaySeconds: 15, 1079 FailureThreshold: 1, 1080 } 1081 startupProbe := &v1.Probe{ 1082 ProbeHandler: v1.ProbeHandler{ 1083 Exec: &v1.ExecAction{ 1084 Command: []string{"/bin/false"}, 1085 }, 1086 }, 1087 InitialDelaySeconds: 15, 1088 FailureThreshold: 60, 1089 } 1090 pod := startupSidecarPodSpec(startupProbe, nil, livenessProbe, cmd) 1091 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 1092 }) 1093 1094 /* 1095 Release: v1.28 1096 Testname: Pod restartable init container liveness probe fails after startup success 1097 Description: A Pod is created with failing liveness probe and delayed 1098 startup probe that uses 'exec' command to cat /tmp/health file. The 1099 Container is started by creating /tmp/startup after 10 seconds, triggering 1100 liveness probe to fail. The Pod MUST not be killed and restarted 1101 incrementing restart count to 1. 1102 */ 1103 ginkgo.It("should be restarted by liveness probe after startup probe enables it", func(ctx context.Context) { 1104 cmd := []string{"/bin/sh", "-c", "sleep 10; echo ok >/tmp/startup; sleep 600"} 1105 livenessProbe := &v1.Probe{ 1106 ProbeHandler: v1.ProbeHandler{ 1107 Exec: &v1.ExecAction{ 1108 Command: []string{"/bin/false"}, 1109 }, 1110 }, 1111 InitialDelaySeconds: 15, 1112 FailureThreshold: 1, 1113 } 1114 startupProbe := &v1.Probe{ 1115 ProbeHandler: v1.ProbeHandler{ 1116 Exec: &v1.ExecAction{ 1117 Command: []string{"cat", "/tmp/startup"}, 1118 }, 1119 }, 1120 InitialDelaySeconds: 15, 1121 FailureThreshold: 60, 1122 } 1123 pod := startupSidecarPodSpec(startupProbe, nil, livenessProbe, cmd) 1124 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1125 }) 1126 1127 /* 1128 Release: v1.28 1129 Testname: Pod restartable init container readiness probe, delayed by startup probe 1130 Description: A Pod is created with startup and readiness probes. The 1131 Container is started by creating /tmp/startup after 45 seconds, delaying 1132 the ready state by this amount of time. This is similar to the "Pod 1133 readiness probe, with initial delay" test. 1134 */ 1135 ginkgo.It("should be ready immediately after startupProbe succeeds", func(ctx context.Context) { 1136 // Probe workers sleep at Kubelet start for a random time which is at most PeriodSeconds 1137 // this test requires both readiness and startup workers running before updating statuses 1138 // to avoid flakes, ensure sleep before startup (32s) > readinessProbe.PeriodSeconds 1139 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 32; echo ok >/tmp/startup; sleep 600"} 1140 readinessProbe := &v1.Probe{ 1141 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/health"}), 1142 InitialDelaySeconds: 0, 1143 PeriodSeconds: 30, 1144 } 1145 startupProbe := &v1.Probe{ 1146 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 1147 InitialDelaySeconds: 0, 1148 FailureThreshold: 120, 1149 PeriodSeconds: 5, 1150 } 1151 p := podClient.Create(ctx, startupSidecarPodSpec(startupProbe, readinessProbe, nil, cmd)) 1152 1153 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 1154 framework.ExpectNoError(err) 1155 1156 err = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, p.Name, 0, framework.PodStartTimeout) 1157 framework.ExpectNoError(err) 1158 startedTime := time.Now() 1159 1160 // We assume the pod became ready when the container became ready. This 1161 // is true for a single container pod. 1162 err = e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout) 1163 framework.ExpectNoError(err) 1164 readyTime := time.Now() 1165 1166 p, err = podClient.Get(ctx, p.Name, metav1.GetOptions{}) 1167 framework.ExpectNoError(err) 1168 1169 isReady, err := testutils.PodRunningReady(p) 1170 framework.ExpectNoError(err) 1171 if !isReady { 1172 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 1173 } 1174 1175 readyIn := readyTime.Sub(startedTime) 1176 framework.Logf("Container started at %v, pod became ready at %v, %v after startupProbe succeeded", startedTime, readyTime, readyIn) 1177 if readyIn < 0 { 1178 framework.Failf("Pod became ready before startupProbe succeeded") 1179 } 1180 if readyIn > 25*time.Second { 1181 framework.Failf("Pod became ready in %v, more than 25s after startupProbe succeeded. It means that the delay readiness probes were not initiated immediately after startup finished.", readyIn) 1182 } 1183 }) 1184 1185 // TODO: Update tests after implementing termination ordering of restartable 1186 // init containers 1187 /* 1188 Release: v1.28 1189 Testname: Set terminationGracePeriodSeconds for livenessProbe of restartable init container 1190 Description: A pod with a long terminationGracePeriod is created with a 1191 shorter livenessProbe-level terminationGracePeriodSeconds. We confirm the 1192 shorter termination period is used. 1193 */ 1194 ginkgo.It("should override timeoutGracePeriodSeconds when LivenessProbe field is set", func(ctx context.Context) { 1195 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 1196 // probe will fail since pod has no http endpoints 1197 shortGracePeriod := int64(5) 1198 livenessProbe := &v1.Probe{ 1199 ProbeHandler: v1.ProbeHandler{ 1200 HTTPGet: &v1.HTTPGetAction{ 1201 Path: "/healthz", 1202 Port: intstr.FromInt32(8080), 1203 }, 1204 }, 1205 InitialDelaySeconds: 10, 1206 FailureThreshold: 1, 1207 TerminationGracePeriodSeconds: &shortGracePeriod, 1208 } 1209 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 1210 longGracePeriod := int64(500) 1211 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 1212 1213 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 1214 // add defaultObservationTimeout(4min) more for kubelet syncing information 1215 // to apiserver 1216 RunSidecarLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 1217 }) 1218 1219 /* 1220 Release: v1.28 1221 Testname: Set terminationGracePeriodSeconds for startupProbe of restartable init container 1222 Description: A pod with a long terminationGracePeriod is created with a 1223 shorter startupProbe-level terminationGracePeriodSeconds. We confirm the 1224 shorter termination period is used. 1225 */ 1226 ginkgo.It("should override timeoutGracePeriodSeconds when StartupProbe field is set", func(ctx context.Context) { 1227 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 1228 // startup probe will fail since pod will sleep for 1000s before becoming ready 1229 livenessProbe := &v1.Probe{ 1230 ProbeHandler: v1.ProbeHandler{ 1231 Exec: &v1.ExecAction{ 1232 Command: []string{"/bin/true"}, 1233 }, 1234 }, 1235 InitialDelaySeconds: 15, 1236 FailureThreshold: 1, 1237 } 1238 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 1239 longGracePeriod := int64(500) 1240 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 1241 1242 shortGracePeriod := int64(5) 1243 pod.Spec.InitContainers[0].StartupProbe = &v1.Probe{ 1244 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 1245 InitialDelaySeconds: 10, 1246 FailureThreshold: 1, 1247 TerminationGracePeriodSeconds: &shortGracePeriod, 1248 } 1249 1250 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 1251 // add defaultObservationTimeout(4min) more for kubelet syncing information 1252 // to apiserver 1253 RunSidecarLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 1254 }) 1255 1256 /* 1257 Release: v1.28 1258 Testname: Pod restartable init container liveness probe, using grpc call, success 1259 Description: A Pod is created with liveness probe on grpc service. Liveness 1260 probe on this endpoint will not fail. When liveness probe does not fail 1261 then the restart count MUST remain zero. 1262 */ 1263 ginkgo.It("should *not* be restarted with a GRPC liveness probe", func(ctx context.Context) { 1264 livenessProbe := &v1.Probe{ 1265 ProbeHandler: v1.ProbeHandler{ 1266 GRPC: &v1.GRPCAction{ 1267 Port: 5000, 1268 Service: nil, 1269 }, 1270 }, 1271 InitialDelaySeconds: probeTestInitialDelaySeconds, 1272 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 1273 FailureThreshold: 1, 1274 } 1275 1276 pod := gRPCServerSidecarPodSpec(nil, livenessProbe, "agnhost") 1277 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 1278 }) 1279 1280 /* 1281 Release: v1.28 1282 Testname: Pod restartable init container liveness probe, using grpc call, failure 1283 Description: A Pod is created with liveness probe on grpc service. 1284 Liveness probe on this endpoint should fail because of wrong probe port. 1285 When liveness probe does fail then the restart count should +1. 1286 */ 1287 ginkgo.It("should be restarted with a GRPC liveness probe", func(ctx context.Context) { 1288 livenessProbe := &v1.Probe{ 1289 ProbeHandler: v1.ProbeHandler{ 1290 GRPC: &v1.GRPCAction{ 1291 Port: 2333, // this port is wrong 1292 }, 1293 }, 1294 InitialDelaySeconds: probeTestInitialDelaySeconds * 4, 1295 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 1296 FailureThreshold: 1, 1297 } 1298 pod := gRPCServerSidecarPodSpec(nil, livenessProbe, "agnhost") 1299 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1300 }) 1301 1302 ginkgo.It("should mark readiness on pods to false while pod is in progress of terminating when a pod has a readiness probe", func(ctx context.Context) { 1303 podName := "probe-test-" + string(uuid.NewUUID()) 1304 podClient := e2epod.NewPodClient(f) 1305 terminationGracePeriod := int64(30) 1306 script := ` 1307 _term() { 1308 rm -f /tmp/ready 1309 sleep 30 1310 exit 0 1311 } 1312 trap _term SIGTERM 1313 1314 touch /tmp/ready 1315 1316 while true; do 1317 echo \"hello\" 1318 sleep 10 1319 done 1320 ` 1321 1322 // Create Pod 1323 podClient.Create(ctx, &v1.Pod{ 1324 ObjectMeta: metav1.ObjectMeta{ 1325 Name: podName, 1326 }, 1327 Spec: v1.PodSpec{ 1328 InitContainers: []v1.Container{ 1329 { 1330 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1331 Name: podName, 1332 Command: []string{"/bin/bash"}, 1333 Args: []string{"-c", script}, 1334 ReadinessProbe: &v1.Probe{ 1335 ProbeHandler: v1.ProbeHandler{ 1336 Exec: &v1.ExecAction{ 1337 Command: []string{"cat", "/tmp/ready"}, 1338 }, 1339 }, 1340 FailureThreshold: 1, 1341 InitialDelaySeconds: 5, 1342 PeriodSeconds: 2, 1343 }, 1344 RestartPolicy: func() *v1.ContainerRestartPolicy { 1345 restartPolicy := v1.ContainerRestartPolicyAlways 1346 return &restartPolicy 1347 }(), 1348 }, 1349 }, 1350 Containers: []v1.Container{ 1351 { 1352 Name: "main", 1353 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1354 Args: []string{"pause"}, 1355 }, 1356 }, 1357 TerminationGracePeriodSeconds: &terminationGracePeriod, 1358 }, 1359 }) 1360 1361 // verify pods are running and ready 1362 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) 1363 framework.ExpectNoError(err) 1364 1365 // Shutdown pod. Readiness should change to false 1366 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 1367 framework.ExpectNoError(err) 1368 1369 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 1370 if !podutil.IsPodReady(pod) { 1371 return true, nil 1372 } 1373 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 1374 return false, nil 1375 }) 1376 framework.ExpectNoError(err) 1377 }) 1378 1379 ginkgo.It("should mark readiness on pods to false and disable liveness probes while pod is in progress of terminating", func(ctx context.Context) { 1380 podName := "probe-test-" + string(uuid.NewUUID()) 1381 podClient := e2epod.NewPodClient(f) 1382 terminationGracePeriod := int64(30) 1383 script := ` 1384 _term() { 1385 rm -f /tmp/ready 1386 rm -f /tmp/liveness 1387 sleep 20 1388 exit 0 1389 } 1390 trap _term SIGTERM 1391 1392 touch /tmp/ready 1393 touch /tmp/liveness 1394 1395 while true; do 1396 echo \"hello\" 1397 sleep 10 1398 done 1399 ` 1400 1401 // Create Pod 1402 podClient.Create(ctx, &v1.Pod{ 1403 ObjectMeta: metav1.ObjectMeta{ 1404 Name: podName, 1405 }, 1406 Spec: v1.PodSpec{ 1407 InitContainers: []v1.Container{ 1408 { 1409 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1410 Name: podName, 1411 Command: []string{"/bin/bash"}, 1412 Args: []string{"-c", script}, 1413 ReadinessProbe: &v1.Probe{ 1414 ProbeHandler: v1.ProbeHandler{ 1415 Exec: &v1.ExecAction{ 1416 Command: []string{"cat", "/tmp/ready"}, 1417 }, 1418 }, 1419 FailureThreshold: 1, 1420 // delay startup to make sure the script script has 1421 // time to create the ready+liveness files 1422 InitialDelaySeconds: 5, 1423 PeriodSeconds: 2, 1424 }, 1425 LivenessProbe: &v1.Probe{ 1426 ProbeHandler: v1.ProbeHandler{ 1427 Exec: &v1.ExecAction{ 1428 Command: []string{"cat", "/tmp/liveness"}, 1429 }, 1430 }, 1431 FailureThreshold: 1, 1432 // delay startup to make sure the script script has 1433 // time to create the ready+liveness files 1434 InitialDelaySeconds: 5, 1435 PeriodSeconds: 1, 1436 }, 1437 RestartPolicy: func() *v1.ContainerRestartPolicy { 1438 restartPolicy := v1.ContainerRestartPolicyAlways 1439 return &restartPolicy 1440 }(), 1441 }, 1442 }, 1443 Containers: []v1.Container{ 1444 { 1445 Name: "main", 1446 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1447 Args: []string{"pause"}, 1448 }, 1449 }, 1450 TerminationGracePeriodSeconds: &terminationGracePeriod, 1451 }, 1452 }) 1453 1454 // verify pods are running and ready 1455 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, f.Timeouts.PodStart) 1456 framework.ExpectNoError(err) 1457 1458 // Shutdown pod. Readiness should change to false 1459 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 1460 framework.ExpectNoError(err) 1461 1462 // Wait for pod to go unready 1463 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 1464 if !podutil.IsPodReady(pod) { 1465 return true, nil 1466 } 1467 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 1468 return false, nil 1469 }) 1470 framework.ExpectNoError(err) 1471 1472 // Verify there are zero liveness failures since they are turned off 1473 // during pod termination 1474 gomega.Consistently(ctx, func(ctx context.Context) (bool, error) { 1475 items, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) 1476 framework.ExpectNoError(err) 1477 for _, event := range items.Items { 1478 // Search only for the pod we are interested in 1479 if event.InvolvedObject.Name != podName { 1480 continue 1481 } 1482 if strings.Contains(event.Message, "failed liveness probe") { 1483 return true, errors.New("should not see liveness probe failures") 1484 } 1485 } 1486 return false, nil 1487 }, 1*time.Minute, framework.Poll).ShouldNot(gomega.BeTrue(), "should not see liveness probes") 1488 }) 1489 }) 1490 1491 // waitForPodStatusByInformer waits pod status change by informer 1492 func waitForPodStatusByInformer(ctx context.Context, c clientset.Interface, podNamespace, podName string, timeout time.Duration, condition func(pod *v1.Pod) (bool, error)) error { 1493 // TODO (pohly): rewrite with gomega.Eventually to get intermediate progress reports. 1494 stopCh := make(chan struct{}) 1495 checkPodStatusFunc := func(pod *v1.Pod) { 1496 if ok, _ := condition(pod); ok { 1497 close(stopCh) 1498 } 1499 } 1500 controller := newInformerWatchPod(ctx, c, podNamespace, podName, checkPodStatusFunc) 1501 go controller.Run(stopCh) 1502 after := time.After(timeout) 1503 select { 1504 case <-stopCh: 1505 return nil 1506 case <-ctx.Done(): 1507 close(stopCh) 1508 return fmt.Errorf("timeout to wait pod status ready") 1509 case <-after: 1510 close(stopCh) 1511 return fmt.Errorf("timeout to wait pod status ready") 1512 } 1513 } 1514 1515 // newInformerWatchPod creates a informer for given pod 1516 func newInformerWatchPod(ctx context.Context, c clientset.Interface, podNamespace, podName string, checkPodStatusFunc func(p *v1.Pod)) cache.Controller { 1517 _, controller := cache.NewInformer( 1518 &cache.ListWatch{ 1519 ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { 1520 options.FieldSelector = fields.SelectorFromSet(fields.Set{"metadata.name": podName}).String() 1521 obj, err := c.CoreV1().Pods(podNamespace).List(ctx, options) 1522 return runtime.Object(obj), err 1523 }, 1524 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 1525 options.FieldSelector = fields.SelectorFromSet(fields.Set{"metadata.name": podName}).String() 1526 return c.CoreV1().Pods(podNamespace).Watch(ctx, options) 1527 }, 1528 }, 1529 &v1.Pod{}, 1530 0, 1531 cache.ResourceEventHandlerFuncs{ 1532 AddFunc: func(obj interface{}) { 1533 p, ok := obj.(*v1.Pod) 1534 if ok { 1535 checkPodStatusFunc(p) 1536 } 1537 }, 1538 UpdateFunc: func(oldObj, newObj interface{}) { 1539 p, ok := newObj.(*v1.Pod) 1540 if ok { 1541 checkPodStatusFunc(p) 1542 } 1543 }, 1544 DeleteFunc: func(obj interface{}) { 1545 p, ok := obj.(*v1.Pod) 1546 if ok { 1547 checkPodStatusFunc(p) 1548 } 1549 }, 1550 }, 1551 ) 1552 return controller 1553 } 1554 1555 // GetContainerStartedTime returns the time when the given container started and error if any 1556 func GetContainerStartedTime(p *v1.Pod, containerName string) (time.Time, error) { 1557 for _, status := range append(p.Status.InitContainerStatuses, p.Status.ContainerStatuses...) { 1558 if status.Name != containerName { 1559 continue 1560 } 1561 if status.State.Running == nil { 1562 return time.Time{}, fmt.Errorf("container is not running") 1563 } 1564 return status.State.Running.StartedAt.Time, nil 1565 } 1566 return time.Time{}, fmt.Errorf("cannot find container named %q", containerName) 1567 } 1568 1569 // GetTransitionTimeForReadyCondition returns the time when the given pod became ready and error if any 1570 func GetTransitionTimeForReadyCondition(p *v1.Pod) (time.Time, error) { 1571 for _, cond := range p.Status.Conditions { 1572 if cond.Type == v1.PodReady { 1573 return cond.LastTransitionTime.Time, nil 1574 } 1575 } 1576 return time.Time{}, fmt.Errorf("no ready condition can be found for pod") 1577 } 1578 1579 func getRestartCount(p *v1.Pod) int { 1580 count := 0 1581 for _, containerStatus := range append(p.Status.InitContainerStatuses, p.Status.ContainerStatuses...) { 1582 count += int(containerStatus.RestartCount) 1583 } 1584 return count 1585 } 1586 1587 func testWebServerPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string, port int) *v1.Pod { 1588 return &v1.Pod{ 1589 ObjectMeta: metav1.ObjectMeta{Name: "test-webserver-" + string(uuid.NewUUID())}, 1590 Spec: v1.PodSpec{ 1591 Containers: []v1.Container{ 1592 { 1593 Name: containerName, 1594 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1595 Args: []string{"test-webserver"}, 1596 Ports: []v1.ContainerPort{{ContainerPort: int32(port)}}, 1597 LivenessProbe: livenessProbe, 1598 ReadinessProbe: readinessProbe, 1599 }, 1600 }, 1601 }, 1602 } 1603 } 1604 1605 func busyBoxPodSpec(readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1606 return &v1.Pod{ 1607 ObjectMeta: metav1.ObjectMeta{ 1608 Name: "busybox-" + string(uuid.NewUUID()), 1609 Labels: map[string]string{"test": "liveness"}, 1610 }, 1611 Spec: v1.PodSpec{ 1612 Containers: []v1.Container{ 1613 { 1614 Name: "busybox", 1615 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1616 Command: cmd, 1617 LivenessProbe: livenessProbe, 1618 ReadinessProbe: readinessProbe, 1619 }, 1620 }, 1621 }, 1622 } 1623 } 1624 1625 func livenessPodSpec(namespace string, readinessProbe, livenessProbe *v1.Probe) *v1.Pod { 1626 pod := e2epod.NewAgnhostPod(namespace, "liveness-"+string(uuid.NewUUID()), nil, nil, nil, "liveness") 1627 pod.ObjectMeta.Labels = map[string]string{"test": "liveness"} 1628 pod.Spec.Containers[0].LivenessProbe = livenessProbe 1629 pod.Spec.Containers[0].ReadinessProbe = readinessProbe 1630 return pod 1631 } 1632 1633 func startupPodSpec(startupProbe, readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1634 return &v1.Pod{ 1635 ObjectMeta: metav1.ObjectMeta{ 1636 Name: "startup-" + string(uuid.NewUUID()), 1637 Labels: map[string]string{"test": "startup"}, 1638 }, 1639 Spec: v1.PodSpec{ 1640 Containers: []v1.Container{ 1641 { 1642 Name: "busybox", 1643 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1644 Command: cmd, 1645 LivenessProbe: livenessProbe, 1646 ReadinessProbe: readinessProbe, 1647 StartupProbe: startupProbe, 1648 }, 1649 }, 1650 }, 1651 } 1652 } 1653 1654 func execHandler(cmd []string) v1.ProbeHandler { 1655 return v1.ProbeHandler{ 1656 Exec: &v1.ExecAction{ 1657 Command: cmd, 1658 }, 1659 } 1660 } 1661 1662 func httpGetHandler(path string, port int) v1.ProbeHandler { 1663 return v1.ProbeHandler{ 1664 HTTPGet: &v1.HTTPGetAction{ 1665 Path: path, 1666 Port: intstr.FromInt32(int32(port)), 1667 }, 1668 } 1669 } 1670 1671 func tcpSocketHandler(port int) v1.ProbeHandler { 1672 return v1.ProbeHandler{ 1673 TCPSocket: &v1.TCPSocketAction{ 1674 Port: intstr.FromInt32(int32(port)), 1675 }, 1676 } 1677 } 1678 1679 type webserverProbeBuilder struct { 1680 failing bool 1681 initialDelay bool 1682 } 1683 1684 func (b webserverProbeBuilder) withFailing() webserverProbeBuilder { 1685 b.failing = true 1686 return b 1687 } 1688 1689 func (b webserverProbeBuilder) withInitialDelay() webserverProbeBuilder { 1690 b.initialDelay = true 1691 return b 1692 } 1693 1694 func (b webserverProbeBuilder) build() *v1.Probe { 1695 probe := &v1.Probe{ 1696 ProbeHandler: httpGetHandler("/", 80), 1697 } 1698 if b.initialDelay { 1699 probe.InitialDelaySeconds = probeTestInitialDelaySeconds 1700 } 1701 if b.failing { 1702 probe.HTTPGet.Port = intstr.FromInt32(81) 1703 } 1704 return probe 1705 } 1706 1707 func RunLivenessTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration) { 1708 gomega.Expect(pod.Spec.Containers).NotTo(gomega.BeEmpty()) 1709 containerName := pod.Spec.Containers[0].Name 1710 runLivenessTest(ctx, f, pod, expectNumRestarts, timeout, containerName) 1711 } 1712 1713 func RunSidecarLivenessTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration) { 1714 gomega.Expect(pod.Spec.InitContainers).NotTo(gomega.BeEmpty()) 1715 containerName := pod.Spec.InitContainers[0].Name 1716 runLivenessTest(ctx, f, pod, expectNumRestarts, timeout, containerName) 1717 } 1718 1719 // RunLivenessTest verifies the number of restarts for pod with given expected number of restarts 1720 func runLivenessTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration, containerName string) { 1721 podClient := e2epod.NewPodClient(f) 1722 ns := f.Namespace.Name 1723 // At the end of the test, clean up by removing the pod. 1724 ginkgo.DeferCleanup(func(ctx context.Context) error { 1725 ginkgo.By("deleting the pod") 1726 return podClient.Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0)) 1727 }) 1728 ginkgo.By(fmt.Sprintf("Creating pod %s in namespace %s", pod.Name, ns)) 1729 podClient.Create(ctx, pod) 1730 1731 // To check for the container is ever started, we need to wait for the 1732 // container to be in a non-waiting state. 1733 framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, ns, pod.Name, "container not waiting", timeout, func(pod *v1.Pod) (bool, error) { 1734 for _, c := range append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...) { 1735 if c.Name == containerName { 1736 if c.State.Running != nil || c.State.Terminated != nil { 1737 return true, nil 1738 } 1739 } 1740 } 1741 return false, nil 1742 })) 1743 1744 // Check the pod's current state and verify that restartCount is present. 1745 ginkgo.By("checking the pod's current state and verifying that restartCount is present") 1746 pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 1747 framework.ExpectNoError(err, fmt.Sprintf("getting pod %s in namespace %s", pod.Name, ns)) 1748 initialRestartCount := podutil.GetExistingContainerStatus(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...), containerName).RestartCount 1749 framework.Logf("Initial restart count of pod %s is %d", pod.Name, initialRestartCount) 1750 1751 // Wait for the restart state to be as desired. 1752 // If initialRestartCount is not zero, there is restarting back-off time. 1753 deadline := time.Now().Add(timeout + time.Duration(initialRestartCount*10)*time.Second) 1754 1755 lastRestartCount := initialRestartCount 1756 observedRestarts := int32(0) 1757 for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) { 1758 pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 1759 framework.Logf("Get pod %s in namespace %s", pod.Name, ns) 1760 framework.ExpectNoError(err, fmt.Sprintf("getting pod %s", pod.Name)) 1761 restartCount := podutil.GetExistingContainerStatus(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...), containerName).RestartCount 1762 if restartCount != lastRestartCount { 1763 framework.Logf("Restart count of pod %s/%s is now %d (%v elapsed)", 1764 ns, pod.Name, restartCount, time.Since(start)) 1765 if restartCount < lastRestartCount { 1766 framework.Failf("Restart count should increment monotonically: restart cont of pod %s/%s changed from %d to %d", 1767 ns, pod.Name, lastRestartCount, restartCount) 1768 } 1769 } 1770 observedRestarts = restartCount - initialRestartCount 1771 if expectNumRestarts > 0 && int(observedRestarts) >= expectNumRestarts { 1772 // Stop if we have observed more than expectNumRestarts restarts. 1773 break 1774 } 1775 lastRestartCount = restartCount 1776 } 1777 1778 // If we expected 0 restarts, fail if observed any restart. 1779 // If we expected n restarts (n > 0), fail if we observed < n restarts. 1780 if (expectNumRestarts == 0 && observedRestarts > 0) || (expectNumRestarts > 0 && 1781 int(observedRestarts) < expectNumRestarts) { 1782 framework.Failf("pod %s/%s - expected number of restarts: %d, found restarts: %d. Pod status: %s.", 1783 ns, pod.Name, expectNumRestarts, observedRestarts, &pod.Status) 1784 } 1785 } 1786 1787 func runReadinessFailTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, notReadyUntil time.Duration, waitForNotPending bool) { 1788 podClient := e2epod.NewPodClient(f) 1789 ns := f.Namespace.Name 1790 gomega.Expect(pod.Spec.Containers).NotTo(gomega.BeEmpty()) 1791 1792 // At the end of the test, clean up by removing the pod. 1793 ginkgo.DeferCleanup(func(ctx context.Context) error { 1794 ginkgo.By("deleting the pod") 1795 return podClient.Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0)) 1796 }) 1797 ginkgo.By(fmt.Sprintf("Creating pod %s in namespace %s", pod.Name, ns)) 1798 podClient.Create(ctx, pod) 1799 1800 if waitForNotPending { 1801 // Wait until the pod is not pending. (Here we need to check for something other than 1802 // 'Pending', since when failures occur, we go to 'Terminated' which can cause indefinite blocking.) 1803 framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, f.ClientSet, ns, pod.Name), 1804 fmt.Sprintf("starting pod %s in namespace %s", pod.Name, ns)) 1805 framework.Logf("Started pod %s in namespace %s", pod.Name, ns) 1806 } 1807 1808 // Wait for the not ready state to be true for notReadyUntil duration 1809 deadline := time.Now().Add(notReadyUntil) 1810 for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) { 1811 // poll for Not Ready 1812 if podutil.IsPodReady(pod) { 1813 framework.Failf("pod %s/%s - expected to be not ready", ns, pod.Name) 1814 } 1815 1816 framework.Logf("pod %s/%s is not ready (%v elapsed)", 1817 ns, pod.Name, time.Since(start)) 1818 } 1819 } 1820 1821 func gRPCServerPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string) *v1.Pod { 1822 return &v1.Pod{ 1823 ObjectMeta: metav1.ObjectMeta{Name: "test-grpc-" + string(uuid.NewUUID())}, 1824 Spec: v1.PodSpec{ 1825 Containers: []v1.Container{ 1826 { 1827 Name: containerName, 1828 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1829 Command: []string{ 1830 "/agnhost", 1831 "grpc-health-checking", 1832 }, 1833 Ports: []v1.ContainerPort{{ContainerPort: int32(5000)}, {ContainerPort: int32(8080)}}, 1834 LivenessProbe: livenessProbe, 1835 ReadinessProbe: readinessProbe, 1836 }, 1837 }, 1838 }, 1839 } 1840 } 1841 1842 func testWebServerSidecarPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string, port int) *v1.Pod { 1843 return &v1.Pod{ 1844 ObjectMeta: metav1.ObjectMeta{Name: "test-webserver-sidecar-" + string(uuid.NewUUID())}, 1845 Spec: v1.PodSpec{ 1846 InitContainers: []v1.Container{ 1847 { 1848 Name: containerName, 1849 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1850 Args: []string{"test-webserver", "--port", fmt.Sprintf("%d", port)}, 1851 Ports: []v1.ContainerPort{{ContainerPort: int32(port)}}, 1852 LivenessProbe: livenessProbe, 1853 ReadinessProbe: readinessProbe, 1854 RestartPolicy: func() *v1.ContainerRestartPolicy { 1855 restartPolicy := v1.ContainerRestartPolicyAlways 1856 return &restartPolicy 1857 }(), 1858 }, 1859 }, 1860 Containers: []v1.Container{ 1861 { 1862 Name: "main", 1863 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1864 Args: []string{"pause"}, 1865 }, 1866 }, 1867 }, 1868 } 1869 } 1870 1871 func busyBoxSidecarPodSpec(readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1872 return &v1.Pod{ 1873 ObjectMeta: metav1.ObjectMeta{ 1874 Name: "busybox-sidecar-" + string(uuid.NewUUID()), 1875 Labels: map[string]string{"test": "liveness"}, 1876 }, 1877 Spec: v1.PodSpec{ 1878 InitContainers: []v1.Container{ 1879 { 1880 Name: "busybox", 1881 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1882 Command: cmd, 1883 LivenessProbe: livenessProbe, 1884 ReadinessProbe: readinessProbe, 1885 RestartPolicy: func() *v1.ContainerRestartPolicy { 1886 restartPolicy := v1.ContainerRestartPolicyAlways 1887 return &restartPolicy 1888 }(), 1889 }, 1890 }, 1891 Containers: []v1.Container{ 1892 { 1893 Name: "main", 1894 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1895 Args: []string{"pause"}, 1896 }, 1897 }, 1898 }, 1899 } 1900 } 1901 1902 func livenessSidecarPodSpec(namespace string, readinessProbe, livenessProbe *v1.Probe) *v1.Pod { 1903 return &v1.Pod{ 1904 ObjectMeta: metav1.ObjectMeta{ 1905 Name: "test-liveness-sidecar-" + string(uuid.NewUUID()), 1906 Labels: map[string]string{"test": "liveness"}, 1907 Namespace: namespace, 1908 }, 1909 Spec: v1.PodSpec{ 1910 InitContainers: []v1.Container{ 1911 { 1912 Name: "sidecar", 1913 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1914 Args: []string{"liveness"}, 1915 LivenessProbe: livenessProbe, 1916 ReadinessProbe: readinessProbe, 1917 RestartPolicy: func() *v1.ContainerRestartPolicy { 1918 restartPolicy := v1.ContainerRestartPolicyAlways 1919 return &restartPolicy 1920 }(), 1921 }, 1922 }, 1923 Containers: []v1.Container{ 1924 { 1925 Name: "main", 1926 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1927 Args: []string{"pause"}, 1928 }, 1929 }, 1930 }, 1931 } 1932 } 1933 1934 func startupSidecarPodSpec(startupProbe, readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1935 return &v1.Pod{ 1936 ObjectMeta: metav1.ObjectMeta{ 1937 Name: "startup-sidecar-" + string(uuid.NewUUID()), 1938 Labels: map[string]string{"test": "startup"}, 1939 }, 1940 Spec: v1.PodSpec{ 1941 InitContainers: []v1.Container{ 1942 { 1943 Name: "sidecar", 1944 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1945 Command: cmd, 1946 LivenessProbe: livenessProbe, 1947 ReadinessProbe: readinessProbe, 1948 StartupProbe: startupProbe, 1949 RestartPolicy: func() *v1.ContainerRestartPolicy { 1950 restartPolicy := v1.ContainerRestartPolicyAlways 1951 return &restartPolicy 1952 }(), 1953 }, 1954 }, 1955 Containers: []v1.Container{ 1956 { 1957 Name: "main", 1958 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1959 Args: []string{"pause"}, 1960 }, 1961 }, 1962 }, 1963 } 1964 } 1965 1966 func gRPCServerSidecarPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string) *v1.Pod { 1967 return &v1.Pod{ 1968 ObjectMeta: metav1.ObjectMeta{Name: "test-grpc-sidecar-" + string(uuid.NewUUID())}, 1969 Spec: v1.PodSpec{ 1970 InitContainers: []v1.Container{ 1971 { 1972 Name: containerName, 1973 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1974 Command: []string{ 1975 "/agnhost", 1976 "grpc-health-checking", 1977 }, 1978 Ports: []v1.ContainerPort{{ContainerPort: int32(5000)}, {ContainerPort: int32(8080)}}, 1979 LivenessProbe: livenessProbe, 1980 ReadinessProbe: readinessProbe, 1981 RestartPolicy: func() *v1.ContainerRestartPolicy { 1982 restartPolicy := v1.ContainerRestartPolicyAlways 1983 return &restartPolicy 1984 }(), 1985 }, 1986 }, 1987 Containers: []v1.Container{ 1988 { 1989 Name: "main", 1990 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1991 Args: []string{"pause"}, 1992 }, 1993 }, 1994 }, 1995 } 1996 }