k8s.io/kubernetes@v1.29.3/test/e2e/common/node/container_probe.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package node 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "net/url" 24 "strings" 25 "time" 26 27 v1 "k8s.io/api/core/v1" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/fields" 30 "k8s.io/apimachinery/pkg/runtime" 31 "k8s.io/apimachinery/pkg/util/intstr" 32 "k8s.io/apimachinery/pkg/util/uuid" 33 "k8s.io/apimachinery/pkg/watch" 34 clientset "k8s.io/client-go/kubernetes" 35 "k8s.io/client-go/tools/cache" 36 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 37 "k8s.io/kubernetes/pkg/kubelet/events" 38 "k8s.io/kubernetes/test/e2e/feature" 39 "k8s.io/kubernetes/test/e2e/framework" 40 e2eevents "k8s.io/kubernetes/test/e2e/framework/events" 41 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 42 testutils "k8s.io/kubernetes/test/utils" 43 imageutils "k8s.io/kubernetes/test/utils/image" 44 admissionapi "k8s.io/pod-security-admission/api" 45 46 "github.com/onsi/ginkgo/v2" 47 "github.com/onsi/gomega" 48 ) 49 50 const ( 51 probeTestInitialDelaySeconds = 15 52 53 defaultObservationTimeout = time.Minute * 4 54 ) 55 56 var _ = SIGDescribe("Probing container", func() { 57 f := framework.NewDefaultFramework("container-probe") 58 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 59 var podClient *e2epod.PodClient 60 probe := webserverProbeBuilder{} 61 62 ginkgo.BeforeEach(func() { 63 podClient = e2epod.NewPodClient(f) 64 }) 65 66 /* 67 Release: v1.9 68 Testname: Pod readiness probe, with initial delay 69 Description: Create a Pod that is configured with a initial delay set on the readiness probe. Check the Pod Start time to compare to the initial delay. The Pod MUST be ready only after the specified initial delay. 70 */ 71 framework.ConformanceIt("with readiness probe should not be ready before initial delay and never restart", f.WithNodeConformance(), func(ctx context.Context) { 72 containerName := "test-webserver" 73 p := podClient.Create(ctx, testWebServerPodSpec(probe.withInitialDelay().build(), nil, containerName, 80)) 74 framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout)) 75 76 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 77 framework.ExpectNoError(err) 78 isReady, err := testutils.PodRunningReady(p) 79 framework.ExpectNoError(err) 80 if !isReady { 81 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 82 } 83 84 // We assume the pod became ready when the container became ready. This 85 // is true for a single container pod. 86 readyTime, err := GetTransitionTimeForReadyCondition(p) 87 framework.ExpectNoError(err) 88 startedTime, err := GetContainerStartedTime(p, containerName) 89 framework.ExpectNoError(err) 90 91 framework.Logf("Container started at %v, pod became ready at %v", startedTime, readyTime) 92 initialDelay := probeTestInitialDelaySeconds * time.Second 93 if readyTime.Sub(startedTime) < initialDelay { 94 framework.Failf("Pod became ready before it's %v initial delay", initialDelay) 95 } 96 97 restartCount := getRestartCount(p) 98 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 99 }) 100 101 /* 102 Release: v1.9 103 Testname: Pod readiness probe, failure 104 Description: Create a Pod with a readiness probe that fails consistently. When this Pod is created, 105 then the Pod MUST never be ready, never be running and restart count MUST be zero. 106 */ 107 framework.ConformanceIt("with readiness probe that fails should never be ready and never restart", f.WithNodeConformance(), func(ctx context.Context) { 108 p := podClient.Create(ctx, testWebServerPodSpec(probe.withFailing().build(), nil, "test-webserver", 80)) 109 gomega.Consistently(ctx, func() (bool, error) { 110 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 111 if err != nil { 112 return false, err 113 } 114 return podutil.IsPodReady(p), nil 115 }, 1*time.Minute, 1*time.Second).ShouldNot(gomega.BeTrue(), "pod should not be ready") 116 117 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 118 framework.ExpectNoError(err) 119 120 isReady, _ := testutils.PodRunningReady(p) 121 if isReady { 122 framework.Failf("pod %s/%s should be not ready", f.Namespace.Name, p.Name) 123 } 124 125 restartCount := getRestartCount(p) 126 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 127 }) 128 129 /* 130 Release: v1.9 131 Testname: Pod liveness probe, using local file, restart 132 Description: Create a Pod with liveness probe that uses ExecAction handler to cat /temp/health file. The Container deletes the file /temp/health after 10 second, triggering liveness probe to fail. The Pod MUST now be killed and restarted incrementing restart count to 1. 133 */ 134 framework.ConformanceIt("should be restarted with a exec \"cat /tmp/health\" liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 135 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 10; rm -rf /tmp/health; sleep 600"} 136 livenessProbe := &v1.Probe{ 137 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 138 InitialDelaySeconds: 15, 139 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 140 FailureThreshold: 1, 141 } 142 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 143 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 144 }) 145 146 /* 147 Release: v1.9 148 Testname: Pod liveness probe, using local file, no restart 149 Description: Pod is created with liveness probe that uses 'exec' command to cat /temp/health file. Liveness probe MUST not fail to check health and the restart count should remain 0. 150 */ 151 framework.ConformanceIt("should *not* be restarted with a exec \"cat /tmp/health\" liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 152 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 600"} 153 livenessProbe := &v1.Probe{ 154 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 155 InitialDelaySeconds: 15, 156 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 157 FailureThreshold: 1, 158 } 159 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 160 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 161 }) 162 163 /* 164 Release: v1.9 165 Testname: Pod liveness probe, using http endpoint, restart 166 Description: A Pod is created with liveness probe on http endpoint /healthz. The http handler on the /healthz will return a http error after 10 seconds since the Pod is started. This MUST result in liveness check failure. The Pod MUST now be killed and restarted incrementing restart count to 1. 167 */ 168 framework.ConformanceIt("should be restarted with a /healthz http liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 169 livenessProbe := &v1.Probe{ 170 ProbeHandler: httpGetHandler("/healthz", 8080), 171 InitialDelaySeconds: 15, 172 FailureThreshold: 1, 173 } 174 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 175 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 176 }) 177 178 /* 179 Release: v1.18 180 Testname: Pod liveness probe, using tcp socket, no restart 181 Description: A Pod is created with liveness probe on tcp socket 8080. The http handler on port 8080 will return http errors after 10 seconds, but the socket will remain open. Liveness probe MUST not fail to check health and the restart count should remain 0. 182 */ 183 framework.ConformanceIt("should *not* be restarted with a tcp:8080 liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 184 livenessProbe := &v1.Probe{ 185 ProbeHandler: tcpSocketHandler(8080), 186 InitialDelaySeconds: 15, 187 FailureThreshold: 1, 188 } 189 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 190 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 191 }) 192 193 /* 194 Release: v1.9 195 Testname: Pod liveness probe, using http endpoint, multiple restarts (slow) 196 Description: A Pod is created with liveness probe on http endpoint /healthz. The http handler on the /healthz will return a http error after 10 seconds since the Pod is started. This MUST result in liveness check failure. The Pod MUST now be killed and restarted incrementing restart count to 1. The liveness probe must fail again after restart once the http handler for /healthz enpoind on the Pod returns an http error after 10 seconds from the start. Restart counts MUST increment every time health check fails, measure up to 5 restart. 197 */ 198 framework.ConformanceIt("should have monotonically increasing restart count", f.WithNodeConformance(), func(ctx context.Context) { 199 livenessProbe := &v1.Probe{ 200 ProbeHandler: httpGetHandler("/healthz", 8080), 201 InitialDelaySeconds: 5, 202 FailureThreshold: 1, 203 } 204 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 205 // ~2 minutes backoff timeouts + 4 minutes defaultObservationTimeout + 2 minutes for each pod restart 206 RunLivenessTest(ctx, f, pod, 5, 2*time.Minute+defaultObservationTimeout+4*2*time.Minute) 207 }) 208 209 /* 210 Release: v1.9 211 Testname: Pod liveness probe, using http endpoint, failure 212 Description: A Pod is created with liveness probe on http endpoint '/'. Liveness probe on this endpoint will not fail. When liveness probe does not fail then the restart count MUST remain zero. 213 */ 214 framework.ConformanceIt("should *not* be restarted with a /healthz http liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 215 livenessProbe := &v1.Probe{ 216 ProbeHandler: httpGetHandler("/", 80), 217 InitialDelaySeconds: 15, 218 TimeoutSeconds: 5, 219 FailureThreshold: 5, // to accommodate nodes which are slow in bringing up containers. 220 } 221 pod := testWebServerPodSpec(nil, livenessProbe, "test-webserver", 80) 222 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 223 }) 224 225 /* 226 Release: v1.9 227 Testname: Pod liveness probe, container exec timeout, restart 228 Description: A Pod is created with liveness probe with a Exec action on the Pod. If the liveness probe call does not return within the timeout specified, liveness probe MUST restart the Pod. 229 */ 230 f.It("should be restarted with an exec liveness probe with timeout [MinimumKubeletVersion:1.20]", f.WithNodeConformance(), func(ctx context.Context) { 231 cmd := []string{"/bin/sh", "-c", "sleep 600"} 232 livenessProbe := &v1.Probe{ 233 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 234 InitialDelaySeconds: 15, 235 TimeoutSeconds: 1, 236 FailureThreshold: 1, 237 } 238 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 239 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 240 }) 241 242 /* 243 Release: v1.20 244 Testname: Pod readiness probe, container exec timeout, not ready 245 Description: A Pod is created with readiness probe with a Exec action on the Pod. If the readiness probe call does not return within the timeout specified, readiness probe MUST not be Ready. 246 */ 247 f.It("should not be ready with an exec readiness probe timeout [MinimumKubeletVersion:1.20]", f.WithNodeConformance(), func(ctx context.Context) { 248 cmd := []string{"/bin/sh", "-c", "sleep 600"} 249 readinessProbe := &v1.Probe{ 250 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 251 InitialDelaySeconds: 15, 252 TimeoutSeconds: 1, 253 FailureThreshold: 1, 254 } 255 pod := busyBoxPodSpec(readinessProbe, nil, cmd) 256 runReadinessFailTest(ctx, f, pod, time.Minute, true) 257 }) 258 259 /* 260 Release: v1.21 261 Testname: Pod liveness probe, container exec timeout, restart 262 Description: A Pod is created with liveness probe with a Exec action on the Pod. If the liveness probe call does not return within the timeout specified, liveness probe MUST restart the Pod. When ExecProbeTimeout feature gate is disabled and cluster is using dockershim, the timeout is ignored BUT a failing liveness probe MUST restart the Pod. 263 */ 264 ginkgo.It("should be restarted with a failing exec liveness probe that took longer than the timeout", func(ctx context.Context) { 265 cmd := []string{"/bin/sh", "-c", "sleep 600"} 266 livenessProbe := &v1.Probe{ 267 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10 & exit 1"}), 268 InitialDelaySeconds: 15, 269 TimeoutSeconds: 1, 270 FailureThreshold: 1, 271 } 272 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 273 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 274 }) 275 276 /* 277 Release: v1.14 278 Testname: Pod http liveness probe, redirected to a local address 279 Description: A Pod is created with liveness probe on http endpoint /redirect?loc=healthz. The http handler on the /redirect will redirect to the /healthz endpoint, which will return a http error after 10 seconds since the Pod is started. This MUST result in liveness check failure. The Pod MUST now be killed and restarted incrementing restart count to 1. 280 */ 281 ginkgo.It("should be restarted with a local redirect http liveness probe", func(ctx context.Context) { 282 livenessProbe := &v1.Probe{ 283 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("/healthz"), 8080), 284 InitialDelaySeconds: 15, 285 FailureThreshold: 1, 286 } 287 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 288 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 289 }) 290 291 /* 292 Release: v1.14 293 Testname: Pod http liveness probe, redirected to a non-local address 294 Description: A Pod is created with liveness probe on http endpoint /redirect with a redirect to http://0.0.0.0/. The http handler on the /redirect should not follow the redirect, but instead treat it as a success and generate an event. 295 */ 296 ginkgo.It("should *not* be restarted with a non-local redirect http liveness probe", func(ctx context.Context) { 297 livenessProbe := &v1.Probe{ 298 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("http://0.0.0.0/"), 8080), 299 InitialDelaySeconds: 15, 300 FailureThreshold: 1, 301 } 302 pod := livenessPodSpec(f.Namespace.Name, nil, livenessProbe) 303 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 304 // Expect an event of type "ProbeWarning". 305 expectedEvent := fields.Set{ 306 "involvedObject.kind": "Pod", 307 "involvedObject.name": pod.Name, 308 "involvedObject.namespace": f.Namespace.Name, 309 "reason": events.ContainerProbeWarning, 310 }.AsSelector().String() 311 framework.ExpectNoError(e2eevents.WaitTimeoutForEvent( 312 ctx, f.ClientSet, f.Namespace.Name, expectedEvent, "Probe terminated redirects, Response body: <a href=\"http://0.0.0.0/\">Found</a>.", framework.PodEventTimeout)) 313 }) 314 315 /* 316 Release: v1.16 317 Testname: Pod startup probe restart 318 Description: A Pod is created with a failing startup probe. The Pod MUST be killed and restarted incrementing restart count to 1, even if liveness would succeed. 319 */ 320 ginkgo.It("should be restarted startup probe fails", func(ctx context.Context) { 321 cmd := []string{"/bin/sh", "-c", "sleep 600"} 322 livenessProbe := &v1.Probe{ 323 ProbeHandler: v1.ProbeHandler{ 324 Exec: &v1.ExecAction{ 325 Command: []string{"/bin/true"}, 326 }, 327 }, 328 InitialDelaySeconds: 15, 329 FailureThreshold: 1, 330 } 331 startupProbe := &v1.Probe{ 332 ProbeHandler: v1.ProbeHandler{ 333 Exec: &v1.ExecAction{ 334 Command: []string{"/bin/false"}, 335 }, 336 }, 337 InitialDelaySeconds: 15, 338 FailureThreshold: 3, 339 } 340 pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) 341 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 342 }) 343 344 /* 345 Release: v1.16 346 Testname: Pod liveness probe delayed (long) by startup probe 347 Description: A Pod is created with failing liveness and startup probes. Liveness probe MUST NOT fail until startup probe expires. 348 */ 349 ginkgo.It("should *not* be restarted by liveness probe because startup probe delays it", func(ctx context.Context) { 350 cmd := []string{"/bin/sh", "-c", "sleep 600"} 351 livenessProbe := &v1.Probe{ 352 ProbeHandler: v1.ProbeHandler{ 353 Exec: &v1.ExecAction{ 354 Command: []string{"/bin/false"}, 355 }, 356 }, 357 InitialDelaySeconds: 15, 358 FailureThreshold: 1, 359 } 360 startupProbe := &v1.Probe{ 361 ProbeHandler: v1.ProbeHandler{ 362 Exec: &v1.ExecAction{ 363 Command: []string{"/bin/false"}, 364 }, 365 }, 366 InitialDelaySeconds: 15, 367 FailureThreshold: 60, 368 } 369 pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) 370 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 371 }) 372 373 /* 374 Release: v1.16 375 Testname: Pod liveness probe fails after startup success 376 Description: A Pod is created with failing liveness probe and delayed startup probe that uses 'exec' command to cat /temp/health file. The Container is started by creating /tmp/startup after 10 seconds, triggering liveness probe to fail. The Pod MUST now be killed and restarted incrementing restart count to 1. 377 */ 378 ginkgo.It("should be restarted by liveness probe after startup probe enables it", func(ctx context.Context) { 379 cmd := []string{"/bin/sh", "-c", "sleep 10; echo ok >/tmp/startup; sleep 600"} 380 livenessProbe := &v1.Probe{ 381 ProbeHandler: v1.ProbeHandler{ 382 Exec: &v1.ExecAction{ 383 Command: []string{"/bin/false"}, 384 }, 385 }, 386 InitialDelaySeconds: 15, 387 FailureThreshold: 1, 388 } 389 startupProbe := &v1.Probe{ 390 ProbeHandler: v1.ProbeHandler{ 391 Exec: &v1.ExecAction{ 392 Command: []string{"cat", "/tmp/startup"}, 393 }, 394 }, 395 InitialDelaySeconds: 15, 396 FailureThreshold: 60, 397 } 398 pod := startupPodSpec(startupProbe, nil, livenessProbe, cmd) 399 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 400 }) 401 402 /* 403 Release: v1.16 404 Testname: Pod readiness probe, delayed by startup probe 405 Description: A Pod is created with startup and readiness probes. The Container is started by creating /tmp/startup after 45 seconds, delaying the ready state by this amount of time. This is similar to the "Pod readiness probe, with initial delay" test. 406 */ 407 ginkgo.It("should be ready immediately after startupProbe succeeds", func(ctx context.Context) { 408 // Probe workers sleep at Kubelet start for a random time which is at most PeriodSeconds 409 // this test requires both readiness and startup workers running before updating statuses 410 // to avoid flakes, ensure sleep before startup (32s) > readinessProbe.PeriodSeconds 411 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 32; echo ok >/tmp/startup; sleep 600"} 412 readinessProbe := &v1.Probe{ 413 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/health"}), 414 InitialDelaySeconds: 0, 415 PeriodSeconds: 30, 416 } 417 startupProbe := &v1.Probe{ 418 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 419 InitialDelaySeconds: 0, 420 FailureThreshold: 120, 421 PeriodSeconds: 5, 422 } 423 p := podClient.Create(ctx, startupPodSpec(startupProbe, readinessProbe, nil, cmd)) 424 425 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 426 framework.ExpectNoError(err) 427 428 err = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, p.Name, 0, framework.PodStartTimeout) 429 framework.ExpectNoError(err) 430 startedTime := time.Now() 431 432 // We assume the pod became ready when the container became ready. This 433 // is true for a single container pod. 434 err = e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout) 435 framework.ExpectNoError(err) 436 readyTime := time.Now() 437 438 p, err = podClient.Get(ctx, p.Name, metav1.GetOptions{}) 439 framework.ExpectNoError(err) 440 441 isReady, err := testutils.PodRunningReady(p) 442 framework.ExpectNoError(err) 443 if !isReady { 444 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 445 } 446 447 readyIn := readyTime.Sub(startedTime) 448 framework.Logf("Container started at %v, pod became ready at %v, %v after startupProbe succeeded", startedTime, readyTime, readyIn) 449 if readyIn < 0 { 450 framework.Failf("Pod became ready before startupProbe succeeded") 451 } 452 if readyIn > 25*time.Second { 453 framework.Failf("Pod became ready in %v, more than 25s after startupProbe succeeded. It means that the delay readiness probes were not initiated immediately after startup finished.", readyIn) 454 } 455 }) 456 457 /* 458 Release: v1.21 459 Testname: Set terminationGracePeriodSeconds for livenessProbe 460 Description: A pod with a long terminationGracePeriod is created with a shorter livenessProbe-level terminationGracePeriodSeconds. We confirm the shorter termination period is used. 461 */ 462 f.It("should override timeoutGracePeriodSeconds when LivenessProbe field is set", f.WithNodeConformance(), func(ctx context.Context) { 463 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 464 // probe will fail since pod has no http endpoints 465 shortGracePeriod := int64(5) 466 livenessProbe := &v1.Probe{ 467 ProbeHandler: v1.ProbeHandler{ 468 HTTPGet: &v1.HTTPGetAction{ 469 Path: "/healthz", 470 Port: intstr.FromInt32(8080), 471 }, 472 }, 473 InitialDelaySeconds: 10, 474 FailureThreshold: 1, 475 TerminationGracePeriodSeconds: &shortGracePeriod, 476 } 477 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 478 longGracePeriod := int64(500) 479 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 480 481 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 482 // add defaultObservationTimeout(4min) more for kubelet syncing information 483 // to apiserver 484 RunLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 485 }) 486 487 /* 488 Release: v1.21 489 Testname: Set terminationGracePeriodSeconds for startupProbe 490 Description: A pod with a long terminationGracePeriod is created with a shorter startupProbe-level terminationGracePeriodSeconds. We confirm the shorter termination period is used. 491 */ 492 f.It("should override timeoutGracePeriodSeconds when StartupProbe field is set", f.WithNodeConformance(), func(ctx context.Context) { 493 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 494 // probe will fail since pod has no http endpoints 495 livenessProbe := &v1.Probe{ 496 ProbeHandler: v1.ProbeHandler{ 497 Exec: &v1.ExecAction{ 498 Command: []string{"/bin/true"}, 499 }, 500 }, 501 InitialDelaySeconds: 15, 502 FailureThreshold: 1, 503 } 504 pod := busyBoxPodSpec(nil, livenessProbe, cmd) 505 longGracePeriod := int64(500) 506 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 507 508 shortGracePeriod := int64(5) 509 pod.Spec.Containers[0].StartupProbe = &v1.Probe{ 510 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 511 InitialDelaySeconds: 10, 512 FailureThreshold: 1, 513 TerminationGracePeriodSeconds: &shortGracePeriod, 514 } 515 516 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 517 // add defaultObservationTimeout(4min) more for kubelet syncing information 518 // to apiserver 519 RunLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 520 }) 521 522 /* 523 Release: v1.23 524 Testname: Pod liveness probe, using grpc call, success 525 Description: A Pod is created with liveness probe on grpc service. Liveness probe on this endpoint will not fail. When liveness probe does not fail then the restart count MUST remain zero. 526 */ 527 framework.ConformanceIt("should *not* be restarted with a GRPC liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 528 livenessProbe := &v1.Probe{ 529 ProbeHandler: v1.ProbeHandler{ 530 GRPC: &v1.GRPCAction{ 531 Port: 5000, 532 Service: nil, 533 }, 534 }, 535 InitialDelaySeconds: probeTestInitialDelaySeconds, 536 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 537 FailureThreshold: 1, 538 } 539 540 pod := gRPCServerPodSpec(nil, livenessProbe, "agnhost") 541 RunLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 542 }) 543 544 /* 545 Release: v1.23 546 Testname: Pod liveness probe, using grpc call, failure 547 Description: A Pod is created with liveness probe on grpc service. Liveness probe on this endpoint should fail because of wrong probe port. 548 When liveness probe does fail then the restart count should +1. 549 */ 550 framework.ConformanceIt("should be restarted with a GRPC liveness probe", f.WithNodeConformance(), func(ctx context.Context) { 551 livenessProbe := &v1.Probe{ 552 ProbeHandler: v1.ProbeHandler{ 553 GRPC: &v1.GRPCAction{ 554 Port: 2333, // this port is wrong 555 }, 556 }, 557 InitialDelaySeconds: probeTestInitialDelaySeconds * 4, 558 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 559 FailureThreshold: 1, 560 } 561 pod := gRPCServerPodSpec(nil, livenessProbe, "agnhost") 562 RunLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 563 }) 564 565 ginkgo.It("should mark readiness on pods to false while pod is in progress of terminating when a pod has a readiness probe", func(ctx context.Context) { 566 podName := "probe-test-" + string(uuid.NewUUID()) 567 podClient := e2epod.NewPodClient(f) 568 terminationGracePeriod := int64(30) 569 script := ` 570 _term() { 571 rm -f /tmp/ready 572 sleep 30 573 exit 0 574 } 575 trap _term SIGTERM 576 577 touch /tmp/ready 578 579 while true; do 580 echo \"hello\" 581 sleep 10 582 done 583 ` 584 585 // Create Pod 586 podClient.Create(ctx, &v1.Pod{ 587 ObjectMeta: metav1.ObjectMeta{ 588 Name: podName, 589 }, 590 Spec: v1.PodSpec{ 591 Containers: []v1.Container{ 592 { 593 Image: imageutils.GetE2EImage(imageutils.Agnhost), 594 Name: podName, 595 Command: []string{"/bin/bash"}, 596 Args: []string{"-c", script}, 597 ReadinessProbe: &v1.Probe{ 598 ProbeHandler: v1.ProbeHandler{ 599 Exec: &v1.ExecAction{ 600 Command: []string{"cat", "/tmp/ready"}, 601 }, 602 }, 603 FailureThreshold: 1, 604 InitialDelaySeconds: 5, 605 PeriodSeconds: 2, 606 }, 607 }, 608 }, 609 TerminationGracePeriodSeconds: &terminationGracePeriod, 610 }, 611 }) 612 613 // verify pods are running and ready 614 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) 615 framework.ExpectNoError(err) 616 617 // Shutdown pod. Readiness should change to false 618 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 619 framework.ExpectNoError(err) 620 621 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 622 if !podutil.IsPodReady(pod) { 623 return true, nil 624 } 625 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 626 return false, nil 627 }) 628 framework.ExpectNoError(err) 629 }) 630 631 ginkgo.It("should mark readiness on pods to false and disable liveness probes while pod is in progress of terminating", func(ctx context.Context) { 632 podName := "probe-test-" + string(uuid.NewUUID()) 633 podClient := e2epod.NewPodClient(f) 634 terminationGracePeriod := int64(30) 635 script := ` 636 _term() { 637 rm -f /tmp/ready 638 rm -f /tmp/liveness 639 sleep 20 640 exit 0 641 } 642 trap _term SIGTERM 643 644 touch /tmp/ready 645 touch /tmp/liveness 646 647 while true; do 648 echo \"hello\" 649 sleep 10 650 done 651 ` 652 653 // Create Pod 654 podClient.Create(ctx, &v1.Pod{ 655 ObjectMeta: metav1.ObjectMeta{ 656 Name: podName, 657 }, 658 Spec: v1.PodSpec{ 659 Containers: []v1.Container{ 660 { 661 Image: imageutils.GetE2EImage(imageutils.Agnhost), 662 Name: podName, 663 Command: []string{"/bin/bash"}, 664 Args: []string{"-c", script}, 665 ReadinessProbe: &v1.Probe{ 666 ProbeHandler: v1.ProbeHandler{ 667 Exec: &v1.ExecAction{ 668 Command: []string{"cat", "/tmp/ready"}, 669 }, 670 }, 671 FailureThreshold: 1, 672 // delay startup to make sure the script script has 673 // time to create the ready+liveness files 674 InitialDelaySeconds: 5, 675 PeriodSeconds: 2, 676 }, 677 LivenessProbe: &v1.Probe{ 678 ProbeHandler: v1.ProbeHandler{ 679 Exec: &v1.ExecAction{ 680 Command: []string{"cat", "/tmp/liveness"}, 681 }, 682 }, 683 FailureThreshold: 1, 684 // delay startup to make sure the script script has 685 // time to create the ready+liveness files 686 InitialDelaySeconds: 5, 687 PeriodSeconds: 1, 688 }, 689 }, 690 }, 691 TerminationGracePeriodSeconds: &terminationGracePeriod, 692 }, 693 }) 694 695 // verify pods are running and ready 696 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) 697 framework.ExpectNoError(err) 698 699 // Shutdown pod. Readiness should change to false 700 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 701 framework.ExpectNoError(err) 702 703 // Wait for pod to go unready 704 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 705 if !podutil.IsPodReady(pod) { 706 return true, nil 707 } 708 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 709 return false, nil 710 }) 711 framework.ExpectNoError(err) 712 713 // Verify there are zero liveness failures since they are turned off 714 // during pod termination 715 gomega.Consistently(ctx, func(ctx context.Context) (bool, error) { 716 items, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) 717 framework.ExpectNoError(err) 718 for _, event := range items.Items { 719 // Search only for the pod we are interested in 720 if event.InvolvedObject.Name != podName { 721 continue 722 } 723 if strings.Contains(event.Message, "failed liveness probe") { 724 return true, errors.New("should not see liveness probe failures") 725 } 726 } 727 return false, nil 728 }, 1*time.Minute, framework.Poll).ShouldNot(gomega.BeTrue(), "should not see liveness probes") 729 }) 730 }) 731 732 var _ = SIGDescribe("[NodeAlphaFeature:SidecarContainers]", feature.SidecarContainers, "Probing restartable init container", func() { 733 f := framework.NewDefaultFramework("container-probe") 734 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 735 var podClient *e2epod.PodClient 736 probe := webserverProbeBuilder{} 737 738 ginkgo.BeforeEach(func() { 739 podClient = e2epod.NewPodClient(f) 740 }) 741 742 /* 743 Release: v1.28 744 Testname: Pod restartable init container readiness probe, with initial delay 745 Description: Create a Pod that is configured with a initial delay set on 746 the readiness probe. Check the Pod Start time to compare to the initial 747 delay. The Pod MUST be ready only after the specified initial delay. 748 */ 749 ginkgo.It("with readiness probe should not be ready before initial delay and never restart", func(ctx context.Context) { 750 containerName := "test-webserver" 751 p := podClient.Create(ctx, testWebServerSidecarPodSpec(probe.withInitialDelay().build(), nil, containerName, 80)) 752 framework.ExpectNoError(e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout)) 753 754 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 755 framework.ExpectNoError(err) 756 isReady, err := testutils.PodRunningReady(p) 757 framework.ExpectNoError(err) 758 if !isReady { 759 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 760 } 761 762 // We assume the pod became ready when the container became ready. This 763 // is true for a single container pod. 764 readyTime, err := GetTransitionTimeForReadyCondition(p) 765 framework.ExpectNoError(err) 766 startedTime, err := GetContainerStartedTime(p, containerName) 767 framework.ExpectNoError(err) 768 769 framework.Logf("Container started at %v, pod became ready at %v", startedTime, readyTime) 770 initialDelay := probeTestInitialDelaySeconds * time.Second 771 if readyTime.Sub(startedTime) < initialDelay { 772 framework.Failf("Pod became ready before it's %v initial delay", initialDelay) 773 } 774 775 restartCount := getRestartCount(p) 776 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 777 }) 778 779 /* 780 Release: v1.28 781 Testname: Pod restartable init container readiness probe, failure 782 Description: Create a Pod with a readiness probe that fails consistently. 783 When this Pod is created, then the Pod MUST never be ready, never be 784 running and restart count MUST be zero. 785 */ 786 ginkgo.It("with readiness probe that fails should never be ready and never restart", func(ctx context.Context) { 787 p := podClient.Create(ctx, testWebServerSidecarPodSpec(probe.withFailing().build(), nil, "test-webserver", 80)) 788 gomega.Consistently(ctx, func() (bool, error) { 789 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 790 if err != nil { 791 return false, err 792 } 793 return podutil.IsPodReady(p), nil 794 }, 1*time.Minute, 1*time.Second).ShouldNot(gomega.BeTrue(), "pod should not be ready") 795 796 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 797 framework.ExpectNoError(err) 798 799 isReady, _ := testutils.PodRunningReady(p) 800 if isReady { 801 framework.Failf("pod %s/%s should be not ready", f.Namespace.Name, p.Name) 802 } 803 804 restartCount := getRestartCount(p) 805 gomega.Expect(restartCount).To(gomega.Equal(0), "pod should have a restart count of 0 but got %v", restartCount) 806 }) 807 808 /* 809 Release: v1.28 810 Testname: Pod restartable init container liveness probe, using local file, restart 811 Description: Create a Pod with liveness probe that uses ExecAction handler 812 to cat /temp/health file. The Container deletes the file /temp/health after 813 10 second, triggering liveness probe to fail. The Pod MUST now be killed 814 and restarted incrementing restart count to 1. 815 */ 816 ginkgo.It("should be restarted with a exec \"cat /tmp/health\" liveness probe", func(ctx context.Context) { 817 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 10; rm -rf /tmp/health; sleep 600"} 818 livenessProbe := &v1.Probe{ 819 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 820 InitialDelaySeconds: 15, 821 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 822 FailureThreshold: 1, 823 } 824 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 825 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 826 }) 827 828 /* 829 Release: v1.28 830 Testname: Pod restartable init container liveness probe, using local file, no restart 831 Description: Pod is created with liveness probe that uses 'exec' command 832 to cat /temp/health file. Liveness probe MUST not fail to check health and 833 the restart count should remain 0. 834 */ 835 ginkgo.It("should *not* be restarted with a exec \"cat /tmp/health\" liveness probe", func(ctx context.Context) { 836 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 600"} 837 livenessProbe := &v1.Probe{ 838 ProbeHandler: execHandler([]string{"cat", "/tmp/health"}), 839 InitialDelaySeconds: 15, 840 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 841 FailureThreshold: 1, 842 } 843 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 844 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 845 }) 846 847 /* 848 Release: v1.28 849 Testname: Pod restartable init container liveness probe, using http endpoint, restart 850 Description: A Pod is created with liveness probe on http endpoint 851 /healthz. The http handler on the /healthz will return a http error after 852 10 seconds since the Pod is started. This MUST result in liveness check 853 failure. The Pod MUST now be killed and restarted incrementing restart 854 count to 1. 855 */ 856 ginkgo.It("should be restarted with a /healthz http liveness probe", func(ctx context.Context) { 857 livenessProbe := &v1.Probe{ 858 ProbeHandler: httpGetHandler("/healthz", 8080), 859 InitialDelaySeconds: 15, 860 TimeoutSeconds: 5, 861 FailureThreshold: 1, 862 } 863 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 864 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 865 }) 866 867 /* 868 Release: v1.28 869 Testname: Pod restartable init container liveness probe, using tcp socket, no restart 870 Description: A Pod is created with liveness probe on tcp socket 8080. The 871 http handler on port 8080 will return http errors after 10 seconds, but the 872 socket will remain open. Liveness probe MUST not fail to check health and 873 the restart count should remain 0. 874 */ 875 ginkgo.It("should *not* be restarted with a tcp:8080 liveness probe", func(ctx context.Context) { 876 livenessProbe := &v1.Probe{ 877 ProbeHandler: tcpSocketHandler(8080), 878 InitialDelaySeconds: 15, 879 TimeoutSeconds: 5, 880 FailureThreshold: 1, 881 } 882 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 883 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 884 }) 885 886 /* 887 Release: v1.28 888 Testname: Pod restartable init container liveness probe, using http endpoint, multiple restarts (slow) 889 Description: A Pod is created with liveness probe on http endpoint 890 /healthz. The http handler on the /healthz will return a http error after 891 10 seconds since the Pod is started. This MUST result in liveness check 892 failure. The Pod MUST now be killed and restarted incrementing restart 893 count to 1. The liveness probe must fail again after restart once the http 894 handler for /healthz enpoind on the Pod returns an http error after 10 895 seconds from the start. Restart counts MUST increment every time health 896 check fails, measure up to 5 restart. 897 */ 898 ginkgo.It("should have monotonically increasing restart count", func(ctx context.Context) { 899 livenessProbe := &v1.Probe{ 900 ProbeHandler: httpGetHandler("/healthz", 8080), 901 InitialDelaySeconds: 5, 902 FailureThreshold: 1, 903 } 904 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 905 // ~2 minutes backoff timeouts + 4 minutes defaultObservationTimeout + 2 minutes for each pod restart 906 RunSidecarLivenessTest(ctx, f, pod, 5, 2*time.Minute+defaultObservationTimeout+4*2*time.Minute) 907 }) 908 909 /* 910 Release: v1.28 911 Testname: Pod restartable init container liveness probe, using http endpoint, failure 912 Description: A Pod is created with liveness probe on http endpoint '/'. 913 Liveness probe on this endpoint will not fail. When liveness probe does not 914 fail then the restart count MUST remain zero. 915 */ 916 ginkgo.It("should *not* be restarted with a /healthz http liveness probe", func(ctx context.Context) { 917 livenessProbe := &v1.Probe{ 918 ProbeHandler: httpGetHandler("/", 80), 919 InitialDelaySeconds: 15, 920 TimeoutSeconds: 5, 921 FailureThreshold: 5, // to accommodate nodes which are slow in bringing up containers. 922 } 923 pod := testWebServerSidecarPodSpec(nil, livenessProbe, "test-webserver", 80) 924 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 925 }) 926 927 /* 928 Release: v1.28 929 Testname: Pod restartable init container liveness probe, container exec timeout, restart 930 Description: A Pod is created with liveness probe with a Exec action on the 931 Pod. If the liveness probe call does not return within the timeout 932 specified, liveness probe MUST restart the Pod. 933 */ 934 ginkgo.It("should be restarted with an exec liveness probe with timeout [MinimumKubeletVersion:1.20]", func(ctx context.Context) { 935 cmd := []string{"/bin/sh", "-c", "sleep 600"} 936 livenessProbe := &v1.Probe{ 937 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 938 InitialDelaySeconds: 15, 939 TimeoutSeconds: 1, 940 FailureThreshold: 1, 941 } 942 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 943 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 944 }) 945 946 /* 947 Release: v1.28 948 Testname: Pod restartable init container readiness probe, container exec timeout, not ready 949 Description: A Pod is created with readiness probe with a Exec action on 950 the Pod. If the readiness probe call does not return within the timeout 951 specified, readiness probe MUST not be Ready. 952 */ 953 ginkgo.It("should not be ready with an exec readiness probe timeout [MinimumKubeletVersion:1.20]", func(ctx context.Context) { 954 cmd := []string{"/bin/sh", "-c", "sleep 600"} 955 readinessProbe := &v1.Probe{ 956 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10"}), 957 InitialDelaySeconds: 15, 958 TimeoutSeconds: 1, 959 FailureThreshold: 1, 960 } 961 pod := busyBoxSidecarPodSpec(readinessProbe, nil, cmd) 962 runReadinessFailTest(ctx, f, pod, time.Minute, false) 963 }) 964 965 /* 966 Release: v1.28 967 Testname: Pod restartalbe init container liveness probe, container exec timeout, restart 968 Description: A Pod is created with liveness probe with a Exec action on the 969 Pod. If the liveness probe call does not return within the timeout 970 specified, liveness probe MUST restart the Pod. When ExecProbeTimeout 971 feature gate is disabled and cluster is using dockershim, the timeout is 972 ignored BUT a failing liveness probe MUST restart the Pod. 973 */ 974 ginkgo.It("should be restarted with a failing exec liveness probe that took longer than the timeout", func(ctx context.Context) { 975 cmd := []string{"/bin/sh", "-c", "sleep 600"} 976 livenessProbe := &v1.Probe{ 977 ProbeHandler: execHandler([]string{"/bin/sh", "-c", "sleep 10 & exit 1"}), 978 InitialDelaySeconds: 15, 979 TimeoutSeconds: 1, 980 FailureThreshold: 1, 981 } 982 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 983 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 984 }) 985 986 /* 987 Release: v1.28 988 Testname: Pod restartable init container http liveness probe, redirected to a local address 989 Description: A Pod is created with liveness probe on http endpoint 990 /redirect?loc=healthz. The http handler on the /redirect will redirect to 991 the /healthz endpoint, which will return a http error after 10 seconds 992 since the Pod is started. This MUST result in liveness check failure. The 993 Pod MUST now be killed and restarted incrementing restart count to 1. 994 */ 995 ginkgo.It("should be restarted with a local redirect http liveness probe", func(ctx context.Context) { 996 livenessProbe := &v1.Probe{ 997 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("/healthz"), 8080), 998 InitialDelaySeconds: 15, 999 FailureThreshold: 1, 1000 } 1001 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 1002 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1003 }) 1004 1005 /* 1006 Release: v1.28 1007 Testname: Pod restartable init container http liveness probe, redirected to a non-local address 1008 Description: A Pod is created with liveness probe on http endpoint 1009 /redirect with a redirect to http://0.0.0.0/. The http handler on the 1010 /redirect should not follow the redirect, but instead treat it as a success 1011 and generate an event. 1012 */ 1013 ginkgo.It("should *not* be restarted with a non-local redirect http liveness probe", func(ctx context.Context) { 1014 livenessProbe := &v1.Probe{ 1015 ProbeHandler: httpGetHandler("/redirect?loc="+url.QueryEscape("http://0.0.0.0/"), 8080), 1016 InitialDelaySeconds: 15, 1017 FailureThreshold: 1, 1018 } 1019 pod := livenessSidecarPodSpec(f.Namespace.Name, nil, livenessProbe) 1020 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 1021 // Expect an event of type "ProbeWarning". 1022 expectedEvent := fields.Set{ 1023 "involvedObject.kind": "Pod", 1024 "involvedObject.name": pod.Name, 1025 "involvedObject.namespace": f.Namespace.Name, 1026 "reason": events.ContainerProbeWarning, 1027 }.AsSelector().String() 1028 framework.ExpectNoError(e2eevents.WaitTimeoutForEvent( 1029 ctx, f.ClientSet, f.Namespace.Name, expectedEvent, "Probe terminated redirects, Response body: <a href=\"http://0.0.0.0/\">Found</a>.", framework.PodEventTimeout)) 1030 }) 1031 1032 /* 1033 Release: v1.28 1034 Testname: Pod restartable init container startup probe restart 1035 Description: A Pod is created with a failing startup probe. The Pod MUST be 1036 killed and restarted incrementing restart count to 1, even if liveness 1037 would succeed. 1038 */ 1039 ginkgo.It("should be restarted startup probe fails", func(ctx context.Context) { 1040 cmd := []string{"/bin/sh", "-c", "sleep 600"} 1041 livenessProbe := &v1.Probe{ 1042 ProbeHandler: v1.ProbeHandler{ 1043 Exec: &v1.ExecAction{ 1044 Command: []string{"/bin/true"}, 1045 }, 1046 }, 1047 InitialDelaySeconds: 15, 1048 FailureThreshold: 1, 1049 } 1050 startupProbe := &v1.Probe{ 1051 ProbeHandler: v1.ProbeHandler{ 1052 Exec: &v1.ExecAction{ 1053 Command: []string{"/bin/false"}, 1054 }, 1055 }, 1056 InitialDelaySeconds: 15, 1057 FailureThreshold: 3, 1058 } 1059 pod := startupSidecarPodSpec(startupProbe, nil, livenessProbe, cmd) 1060 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1061 }) 1062 1063 /* 1064 Release: v1.28 1065 Testname: Pod restartable init container liveness probe delayed (long) by startup probe 1066 Description: A Pod is created with failing liveness and startup probes. 1067 Liveness probe MUST NOT fail until startup probe expires. 1068 */ 1069 ginkgo.It("should *not* be restarted by liveness probe because startup probe delays it", func(ctx context.Context) { 1070 cmd := []string{"/bin/sh", "-c", "sleep 600"} 1071 livenessProbe := &v1.Probe{ 1072 ProbeHandler: v1.ProbeHandler{ 1073 Exec: &v1.ExecAction{ 1074 Command: []string{"/bin/false"}, 1075 }, 1076 }, 1077 InitialDelaySeconds: 15, 1078 FailureThreshold: 1, 1079 } 1080 startupProbe := &v1.Probe{ 1081 ProbeHandler: v1.ProbeHandler{ 1082 Exec: &v1.ExecAction{ 1083 Command: []string{"/bin/false"}, 1084 }, 1085 }, 1086 InitialDelaySeconds: 15, 1087 FailureThreshold: 60, 1088 } 1089 pod := startupSidecarPodSpec(startupProbe, nil, livenessProbe, cmd) 1090 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 1091 }) 1092 1093 /* 1094 Release: v1.28 1095 Testname: Pod restartable init container liveness probe fails after startup success 1096 Description: A Pod is created with failing liveness probe and delayed 1097 startup probe that uses 'exec' command to cat /tmp/health file. The 1098 Container is started by creating /tmp/startup after 10 seconds, triggering 1099 liveness probe to fail. The Pod MUST not be killed and restarted 1100 incrementing restart count to 1. 1101 */ 1102 ginkgo.It("should be restarted by liveness probe after startup probe enables it", func(ctx context.Context) { 1103 cmd := []string{"/bin/sh", "-c", "sleep 10; echo ok >/tmp/startup; sleep 600"} 1104 livenessProbe := &v1.Probe{ 1105 ProbeHandler: v1.ProbeHandler{ 1106 Exec: &v1.ExecAction{ 1107 Command: []string{"/bin/false"}, 1108 }, 1109 }, 1110 InitialDelaySeconds: 15, 1111 FailureThreshold: 1, 1112 } 1113 startupProbe := &v1.Probe{ 1114 ProbeHandler: v1.ProbeHandler{ 1115 Exec: &v1.ExecAction{ 1116 Command: []string{"cat", "/tmp/startup"}, 1117 }, 1118 }, 1119 InitialDelaySeconds: 15, 1120 FailureThreshold: 60, 1121 } 1122 pod := startupSidecarPodSpec(startupProbe, nil, livenessProbe, cmd) 1123 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1124 }) 1125 1126 /* 1127 Release: v1.28 1128 Testname: Pod restartable init container readiness probe, delayed by startup probe 1129 Description: A Pod is created with startup and readiness probes. The 1130 Container is started by creating /tmp/startup after 45 seconds, delaying 1131 the ready state by this amount of time. This is similar to the "Pod 1132 readiness probe, with initial delay" test. 1133 */ 1134 ginkgo.It("should be ready immediately after startupProbe succeeds", func(ctx context.Context) { 1135 // Probe workers sleep at Kubelet start for a random time which is at most PeriodSeconds 1136 // this test requires both readiness and startup workers running before updating statuses 1137 // to avoid flakes, ensure sleep before startup (32s) > readinessProbe.PeriodSeconds 1138 cmd := []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 32; echo ok >/tmp/startup; sleep 600"} 1139 readinessProbe := &v1.Probe{ 1140 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/health"}), 1141 InitialDelaySeconds: 0, 1142 PeriodSeconds: 30, 1143 } 1144 startupProbe := &v1.Probe{ 1145 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 1146 InitialDelaySeconds: 0, 1147 FailureThreshold: 120, 1148 PeriodSeconds: 5, 1149 } 1150 p := podClient.Create(ctx, startupSidecarPodSpec(startupProbe, readinessProbe, nil, cmd)) 1151 1152 p, err := podClient.Get(ctx, p.Name, metav1.GetOptions{}) 1153 framework.ExpectNoError(err) 1154 1155 err = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, p.Name, 0, framework.PodStartTimeout) 1156 framework.ExpectNoError(err) 1157 startedTime := time.Now() 1158 1159 // We assume the pod became ready when the container became ready. This 1160 // is true for a single container pod. 1161 err = e2epod.WaitTimeoutForPodReadyInNamespace(ctx, f.ClientSet, p.Name, f.Namespace.Name, framework.PodStartTimeout) 1162 framework.ExpectNoError(err) 1163 readyTime := time.Now() 1164 1165 p, err = podClient.Get(ctx, p.Name, metav1.GetOptions{}) 1166 framework.ExpectNoError(err) 1167 1168 isReady, err := testutils.PodRunningReady(p) 1169 framework.ExpectNoError(err) 1170 if !isReady { 1171 framework.Failf("pod %s/%s should be ready", f.Namespace.Name, p.Name) 1172 } 1173 1174 readyIn := readyTime.Sub(startedTime) 1175 framework.Logf("Container started at %v, pod became ready at %v, %v after startupProbe succeeded", startedTime, readyTime, readyIn) 1176 if readyIn < 0 { 1177 framework.Failf("Pod became ready before startupProbe succeeded") 1178 } 1179 if readyIn > 25*time.Second { 1180 framework.Failf("Pod became ready in %v, more than 25s after startupProbe succeeded. It means that the delay readiness probes were not initiated immediately after startup finished.", readyIn) 1181 } 1182 }) 1183 1184 // TODO: Update tests after implementing termination ordering of restartable 1185 // init containers 1186 /* 1187 Release: v1.28 1188 Testname: Set terminationGracePeriodSeconds for livenessProbe of restartable init container 1189 Description: A pod with a long terminationGracePeriod is created with a 1190 shorter livenessProbe-level terminationGracePeriodSeconds. We confirm the 1191 shorter termination period is used. 1192 */ 1193 ginkgo.It("should override timeoutGracePeriodSeconds when LivenessProbe field is set", func(ctx context.Context) { 1194 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 1195 // probe will fail since pod has no http endpoints 1196 shortGracePeriod := int64(5) 1197 livenessProbe := &v1.Probe{ 1198 ProbeHandler: v1.ProbeHandler{ 1199 HTTPGet: &v1.HTTPGetAction{ 1200 Path: "/healthz", 1201 Port: intstr.FromInt32(8080), 1202 }, 1203 }, 1204 InitialDelaySeconds: 10, 1205 FailureThreshold: 1, 1206 TerminationGracePeriodSeconds: &shortGracePeriod, 1207 } 1208 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 1209 longGracePeriod := int64(500) 1210 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 1211 1212 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 1213 // add defaultObservationTimeout(4min) more for kubelet syncing information 1214 // to apiserver 1215 RunSidecarLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 1216 }) 1217 1218 /* 1219 Release: v1.28 1220 Testname: Set terminationGracePeriodSeconds for startupProbe of restartable init container 1221 Description: A pod with a long terminationGracePeriod is created with a 1222 shorter startupProbe-level terminationGracePeriodSeconds. We confirm the 1223 shorter termination period is used. 1224 */ 1225 ginkgo.It("should override timeoutGracePeriodSeconds when StartupProbe field is set", func(ctx context.Context) { 1226 cmd := []string{"/bin/sh", "-c", "sleep 1000"} 1227 // startup probe will fail since pod will sleep for 1000s before becoming ready 1228 livenessProbe := &v1.Probe{ 1229 ProbeHandler: v1.ProbeHandler{ 1230 Exec: &v1.ExecAction{ 1231 Command: []string{"/bin/true"}, 1232 }, 1233 }, 1234 InitialDelaySeconds: 15, 1235 FailureThreshold: 1, 1236 } 1237 pod := busyBoxSidecarPodSpec(nil, livenessProbe, cmd) 1238 longGracePeriod := int64(500) 1239 pod.Spec.TerminationGracePeriodSeconds = &longGracePeriod 1240 1241 shortGracePeriod := int64(5) 1242 pod.Spec.InitContainers[0].StartupProbe = &v1.Probe{ 1243 ProbeHandler: execHandler([]string{"/bin/cat", "/tmp/startup"}), 1244 InitialDelaySeconds: 10, 1245 FailureThreshold: 1, 1246 TerminationGracePeriodSeconds: &shortGracePeriod, 1247 } 1248 1249 // 10s delay + 10s period + 5s grace period = 25s < 30s << pod-level timeout 500 1250 // add defaultObservationTimeout(4min) more for kubelet syncing information 1251 // to apiserver 1252 RunSidecarLivenessTest(ctx, f, pod, 1, time.Second*40+defaultObservationTimeout) 1253 }) 1254 1255 /* 1256 Release: v1.28 1257 Testname: Pod restartable init container liveness probe, using grpc call, success 1258 Description: A Pod is created with liveness probe on grpc service. Liveness 1259 probe on this endpoint will not fail. When liveness probe does not fail 1260 then the restart count MUST remain zero. 1261 */ 1262 ginkgo.It("should *not* be restarted with a GRPC liveness probe", func(ctx context.Context) { 1263 livenessProbe := &v1.Probe{ 1264 ProbeHandler: v1.ProbeHandler{ 1265 GRPC: &v1.GRPCAction{ 1266 Port: 5000, 1267 Service: nil, 1268 }, 1269 }, 1270 InitialDelaySeconds: probeTestInitialDelaySeconds, 1271 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 1272 FailureThreshold: 1, 1273 } 1274 1275 pod := gRPCServerSidecarPodSpec(nil, livenessProbe, "agnhost") 1276 RunSidecarLivenessTest(ctx, f, pod, 0, defaultObservationTimeout) 1277 }) 1278 1279 /* 1280 Release: v1.28 1281 Testname: Pod restartable init container liveness probe, using grpc call, failure 1282 Description: A Pod is created with liveness probe on grpc service. 1283 Liveness probe on this endpoint should fail because of wrong probe port. 1284 When liveness probe does fail then the restart count should +1. 1285 */ 1286 ginkgo.It("should be restarted with a GRPC liveness probe", func(ctx context.Context) { 1287 livenessProbe := &v1.Probe{ 1288 ProbeHandler: v1.ProbeHandler{ 1289 GRPC: &v1.GRPCAction{ 1290 Port: 2333, // this port is wrong 1291 }, 1292 }, 1293 InitialDelaySeconds: probeTestInitialDelaySeconds * 4, 1294 TimeoutSeconds: 5, // default 1s can be pretty aggressive in CI environments with low resources 1295 FailureThreshold: 1, 1296 } 1297 pod := gRPCServerSidecarPodSpec(nil, livenessProbe, "agnhost") 1298 RunSidecarLivenessTest(ctx, f, pod, 1, defaultObservationTimeout) 1299 }) 1300 1301 ginkgo.It("should mark readiness on pods to false while pod is in progress of terminating when a pod has a readiness probe", func(ctx context.Context) { 1302 podName := "probe-test-" + string(uuid.NewUUID()) 1303 podClient := e2epod.NewPodClient(f) 1304 terminationGracePeriod := int64(30) 1305 script := ` 1306 _term() { 1307 rm -f /tmp/ready 1308 sleep 30 1309 exit 0 1310 } 1311 trap _term SIGTERM 1312 1313 touch /tmp/ready 1314 1315 while true; do 1316 echo \"hello\" 1317 sleep 10 1318 done 1319 ` 1320 1321 // Create Pod 1322 podClient.Create(ctx, &v1.Pod{ 1323 ObjectMeta: metav1.ObjectMeta{ 1324 Name: podName, 1325 }, 1326 Spec: v1.PodSpec{ 1327 InitContainers: []v1.Container{ 1328 { 1329 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1330 Name: podName, 1331 Command: []string{"/bin/bash"}, 1332 Args: []string{"-c", script}, 1333 ReadinessProbe: &v1.Probe{ 1334 ProbeHandler: v1.ProbeHandler{ 1335 Exec: &v1.ExecAction{ 1336 Command: []string{"cat", "/tmp/ready"}, 1337 }, 1338 }, 1339 FailureThreshold: 1, 1340 InitialDelaySeconds: 5, 1341 PeriodSeconds: 2, 1342 }, 1343 RestartPolicy: func() *v1.ContainerRestartPolicy { 1344 restartPolicy := v1.ContainerRestartPolicyAlways 1345 return &restartPolicy 1346 }(), 1347 }, 1348 }, 1349 Containers: []v1.Container{ 1350 { 1351 Name: "main", 1352 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1353 Args: []string{"pause"}, 1354 }, 1355 }, 1356 TerminationGracePeriodSeconds: &terminationGracePeriod, 1357 }, 1358 }) 1359 1360 // verify pods are running and ready 1361 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) 1362 framework.ExpectNoError(err) 1363 1364 // Shutdown pod. Readiness should change to false 1365 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 1366 framework.ExpectNoError(err) 1367 1368 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 1369 if !podutil.IsPodReady(pod) { 1370 return true, nil 1371 } 1372 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 1373 return false, nil 1374 }) 1375 framework.ExpectNoError(err) 1376 }) 1377 1378 ginkgo.It("should mark readiness on pods to false and disable liveness probes while pod is in progress of terminating", func(ctx context.Context) { 1379 podName := "probe-test-" + string(uuid.NewUUID()) 1380 podClient := e2epod.NewPodClient(f) 1381 terminationGracePeriod := int64(30) 1382 script := ` 1383 _term() { 1384 rm -f /tmp/ready 1385 rm -f /tmp/liveness 1386 sleep 20 1387 exit 0 1388 } 1389 trap _term SIGTERM 1390 1391 touch /tmp/ready 1392 touch /tmp/liveness 1393 1394 while true; do 1395 echo \"hello\" 1396 sleep 10 1397 done 1398 ` 1399 1400 // Create Pod 1401 podClient.Create(ctx, &v1.Pod{ 1402 ObjectMeta: metav1.ObjectMeta{ 1403 Name: podName, 1404 }, 1405 Spec: v1.PodSpec{ 1406 InitContainers: []v1.Container{ 1407 { 1408 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1409 Name: podName, 1410 Command: []string{"/bin/bash"}, 1411 Args: []string{"-c", script}, 1412 ReadinessProbe: &v1.Probe{ 1413 ProbeHandler: v1.ProbeHandler{ 1414 Exec: &v1.ExecAction{ 1415 Command: []string{"cat", "/tmp/ready"}, 1416 }, 1417 }, 1418 FailureThreshold: 1, 1419 // delay startup to make sure the script script has 1420 // time to create the ready+liveness files 1421 InitialDelaySeconds: 5, 1422 PeriodSeconds: 2, 1423 }, 1424 LivenessProbe: &v1.Probe{ 1425 ProbeHandler: v1.ProbeHandler{ 1426 Exec: &v1.ExecAction{ 1427 Command: []string{"cat", "/tmp/liveness"}, 1428 }, 1429 }, 1430 FailureThreshold: 1, 1431 // delay startup to make sure the script script has 1432 // time to create the ready+liveness files 1433 InitialDelaySeconds: 5, 1434 PeriodSeconds: 1, 1435 }, 1436 RestartPolicy: func() *v1.ContainerRestartPolicy { 1437 restartPolicy := v1.ContainerRestartPolicyAlways 1438 return &restartPolicy 1439 }(), 1440 }, 1441 }, 1442 Containers: []v1.Container{ 1443 { 1444 Name: "main", 1445 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1446 Args: []string{"pause"}, 1447 }, 1448 }, 1449 TerminationGracePeriodSeconds: &terminationGracePeriod, 1450 }, 1451 }) 1452 1453 // verify pods are running and ready 1454 err := e2epod.WaitForPodsRunningReady(ctx, f.ClientSet, f.Namespace.Name, 1, 0, f.Timeouts.PodStart) 1455 framework.ExpectNoError(err) 1456 1457 // Shutdown pod. Readiness should change to false 1458 err = podClient.Delete(ctx, podName, metav1.DeleteOptions{}) 1459 framework.ExpectNoError(err) 1460 1461 // Wait for pod to go unready 1462 err = waitForPodStatusByInformer(ctx, f.ClientSet, f.Namespace.Name, podName, f.Timeouts.PodDelete, func(pod *v1.Pod) (bool, error) { 1463 if !podutil.IsPodReady(pod) { 1464 return true, nil 1465 } 1466 framework.Logf("pod %s/%s is still ready, waiting until is not ready", pod.Namespace, pod.Name) 1467 return false, nil 1468 }) 1469 framework.ExpectNoError(err) 1470 1471 // Verify there are zero liveness failures since they are turned off 1472 // during pod termination 1473 gomega.Consistently(ctx, func(ctx context.Context) (bool, error) { 1474 items, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{}) 1475 framework.ExpectNoError(err) 1476 for _, event := range items.Items { 1477 // Search only for the pod we are interested in 1478 if event.InvolvedObject.Name != podName { 1479 continue 1480 } 1481 if strings.Contains(event.Message, "failed liveness probe") { 1482 return true, errors.New("should not see liveness probe failures") 1483 } 1484 } 1485 return false, nil 1486 }, 1*time.Minute, framework.Poll).ShouldNot(gomega.BeTrue(), "should not see liveness probes") 1487 }) 1488 }) 1489 1490 // waitForPodStatusByInformer waits pod status change by informer 1491 func waitForPodStatusByInformer(ctx context.Context, c clientset.Interface, podNamespace, podName string, timeout time.Duration, condition func(pod *v1.Pod) (bool, error)) error { 1492 // TODO (pohly): rewrite with gomega.Eventually to get intermediate progress reports. 1493 stopCh := make(chan struct{}) 1494 checkPodStatusFunc := func(pod *v1.Pod) { 1495 if ok, _ := condition(pod); ok { 1496 close(stopCh) 1497 } 1498 } 1499 controller := newInformerWatchPod(ctx, c, podNamespace, podName, checkPodStatusFunc) 1500 go controller.Run(stopCh) 1501 after := time.After(timeout) 1502 select { 1503 case <-stopCh: 1504 return nil 1505 case <-ctx.Done(): 1506 close(stopCh) 1507 return fmt.Errorf("timeout to wait pod status ready") 1508 case <-after: 1509 close(stopCh) 1510 return fmt.Errorf("timeout to wait pod status ready") 1511 } 1512 } 1513 1514 // newInformerWatchPod creates a informer for given pod 1515 func newInformerWatchPod(ctx context.Context, c clientset.Interface, podNamespace, podName string, checkPodStatusFunc func(p *v1.Pod)) cache.Controller { 1516 _, controller := cache.NewInformer( 1517 &cache.ListWatch{ 1518 ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { 1519 options.FieldSelector = fields.SelectorFromSet(fields.Set{"metadata.name": podName}).String() 1520 obj, err := c.CoreV1().Pods(podNamespace).List(ctx, options) 1521 return runtime.Object(obj), err 1522 }, 1523 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 1524 options.FieldSelector = fields.SelectorFromSet(fields.Set{"metadata.name": podName}).String() 1525 return c.CoreV1().Pods(podNamespace).Watch(ctx, options) 1526 }, 1527 }, 1528 &v1.Pod{}, 1529 0, 1530 cache.ResourceEventHandlerFuncs{ 1531 AddFunc: func(obj interface{}) { 1532 p, ok := obj.(*v1.Pod) 1533 if ok { 1534 checkPodStatusFunc(p) 1535 } 1536 }, 1537 UpdateFunc: func(oldObj, newObj interface{}) { 1538 p, ok := newObj.(*v1.Pod) 1539 if ok { 1540 checkPodStatusFunc(p) 1541 } 1542 }, 1543 DeleteFunc: func(obj interface{}) { 1544 p, ok := obj.(*v1.Pod) 1545 if ok { 1546 checkPodStatusFunc(p) 1547 } 1548 }, 1549 }, 1550 ) 1551 return controller 1552 } 1553 1554 // GetContainerStartedTime returns the time when the given container started and error if any 1555 func GetContainerStartedTime(p *v1.Pod, containerName string) (time.Time, error) { 1556 for _, status := range append(p.Status.InitContainerStatuses, p.Status.ContainerStatuses...) { 1557 if status.Name != containerName { 1558 continue 1559 } 1560 if status.State.Running == nil { 1561 return time.Time{}, fmt.Errorf("container is not running") 1562 } 1563 return status.State.Running.StartedAt.Time, nil 1564 } 1565 return time.Time{}, fmt.Errorf("cannot find container named %q", containerName) 1566 } 1567 1568 // GetTransitionTimeForReadyCondition returns the time when the given pod became ready and error if any 1569 func GetTransitionTimeForReadyCondition(p *v1.Pod) (time.Time, error) { 1570 for _, cond := range p.Status.Conditions { 1571 if cond.Type == v1.PodReady { 1572 return cond.LastTransitionTime.Time, nil 1573 } 1574 } 1575 return time.Time{}, fmt.Errorf("no ready condition can be found for pod") 1576 } 1577 1578 func getRestartCount(p *v1.Pod) int { 1579 count := 0 1580 for _, containerStatus := range append(p.Status.InitContainerStatuses, p.Status.ContainerStatuses...) { 1581 count += int(containerStatus.RestartCount) 1582 } 1583 return count 1584 } 1585 1586 func testWebServerPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string, port int) *v1.Pod { 1587 return &v1.Pod{ 1588 ObjectMeta: metav1.ObjectMeta{Name: "test-webserver-" + string(uuid.NewUUID())}, 1589 Spec: v1.PodSpec{ 1590 Containers: []v1.Container{ 1591 { 1592 Name: containerName, 1593 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1594 Args: []string{"test-webserver"}, 1595 Ports: []v1.ContainerPort{{ContainerPort: int32(port)}}, 1596 LivenessProbe: livenessProbe, 1597 ReadinessProbe: readinessProbe, 1598 }, 1599 }, 1600 }, 1601 } 1602 } 1603 1604 func busyBoxPodSpec(readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1605 return &v1.Pod{ 1606 ObjectMeta: metav1.ObjectMeta{ 1607 Name: "busybox-" + string(uuid.NewUUID()), 1608 Labels: map[string]string{"test": "liveness"}, 1609 }, 1610 Spec: v1.PodSpec{ 1611 Containers: []v1.Container{ 1612 { 1613 Name: "busybox", 1614 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1615 Command: cmd, 1616 LivenessProbe: livenessProbe, 1617 ReadinessProbe: readinessProbe, 1618 }, 1619 }, 1620 }, 1621 } 1622 } 1623 1624 func livenessPodSpec(namespace string, readinessProbe, livenessProbe *v1.Probe) *v1.Pod { 1625 pod := e2epod.NewAgnhostPod(namespace, "liveness-"+string(uuid.NewUUID()), nil, nil, nil, "liveness") 1626 pod.ObjectMeta.Labels = map[string]string{"test": "liveness"} 1627 pod.Spec.Containers[0].LivenessProbe = livenessProbe 1628 pod.Spec.Containers[0].ReadinessProbe = readinessProbe 1629 return pod 1630 } 1631 1632 func startupPodSpec(startupProbe, readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1633 return &v1.Pod{ 1634 ObjectMeta: metav1.ObjectMeta{ 1635 Name: "startup-" + string(uuid.NewUUID()), 1636 Labels: map[string]string{"test": "startup"}, 1637 }, 1638 Spec: v1.PodSpec{ 1639 Containers: []v1.Container{ 1640 { 1641 Name: "busybox", 1642 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1643 Command: cmd, 1644 LivenessProbe: livenessProbe, 1645 ReadinessProbe: readinessProbe, 1646 StartupProbe: startupProbe, 1647 }, 1648 }, 1649 }, 1650 } 1651 } 1652 1653 func execHandler(cmd []string) v1.ProbeHandler { 1654 return v1.ProbeHandler{ 1655 Exec: &v1.ExecAction{ 1656 Command: cmd, 1657 }, 1658 } 1659 } 1660 1661 func httpGetHandler(path string, port int) v1.ProbeHandler { 1662 return v1.ProbeHandler{ 1663 HTTPGet: &v1.HTTPGetAction{ 1664 Path: path, 1665 Port: intstr.FromInt32(int32(port)), 1666 }, 1667 } 1668 } 1669 1670 func tcpSocketHandler(port int) v1.ProbeHandler { 1671 return v1.ProbeHandler{ 1672 TCPSocket: &v1.TCPSocketAction{ 1673 Port: intstr.FromInt32(int32(port)), 1674 }, 1675 } 1676 } 1677 1678 type webserverProbeBuilder struct { 1679 failing bool 1680 initialDelay bool 1681 } 1682 1683 func (b webserverProbeBuilder) withFailing() webserverProbeBuilder { 1684 b.failing = true 1685 return b 1686 } 1687 1688 func (b webserverProbeBuilder) withInitialDelay() webserverProbeBuilder { 1689 b.initialDelay = true 1690 return b 1691 } 1692 1693 func (b webserverProbeBuilder) build() *v1.Probe { 1694 probe := &v1.Probe{ 1695 ProbeHandler: httpGetHandler("/", 80), 1696 } 1697 if b.initialDelay { 1698 probe.InitialDelaySeconds = probeTestInitialDelaySeconds 1699 } 1700 if b.failing { 1701 probe.HTTPGet.Port = intstr.FromInt32(81) 1702 } 1703 return probe 1704 } 1705 1706 func RunLivenessTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration) { 1707 gomega.Expect(pod.Spec.Containers).NotTo(gomega.BeEmpty()) 1708 containerName := pod.Spec.Containers[0].Name 1709 runLivenessTest(ctx, f, pod, expectNumRestarts, timeout, containerName) 1710 } 1711 1712 func RunSidecarLivenessTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration) { 1713 gomega.Expect(pod.Spec.InitContainers).NotTo(gomega.BeEmpty()) 1714 containerName := pod.Spec.InitContainers[0].Name 1715 runLivenessTest(ctx, f, pod, expectNumRestarts, timeout, containerName) 1716 } 1717 1718 // RunLivenessTest verifies the number of restarts for pod with given expected number of restarts 1719 func runLivenessTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, expectNumRestarts int, timeout time.Duration, containerName string) { 1720 podClient := e2epod.NewPodClient(f) 1721 ns := f.Namespace.Name 1722 // At the end of the test, clean up by removing the pod. 1723 ginkgo.DeferCleanup(func(ctx context.Context) error { 1724 ginkgo.By("deleting the pod") 1725 return podClient.Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0)) 1726 }) 1727 ginkgo.By(fmt.Sprintf("Creating pod %s in namespace %s", pod.Name, ns)) 1728 podClient.Create(ctx, pod) 1729 1730 // To check for the container is ever started, we need to wait for the 1731 // container to be in a non-waiting state. 1732 framework.ExpectNoError(e2epod.WaitForPodCondition(ctx, f.ClientSet, ns, pod.Name, "container not waiting", timeout, func(pod *v1.Pod) (bool, error) { 1733 for _, c := range append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...) { 1734 if c.Name == containerName { 1735 if c.State.Running != nil || c.State.Terminated != nil { 1736 return true, nil 1737 } 1738 } 1739 } 1740 return false, nil 1741 })) 1742 1743 // Check the pod's current state and verify that restartCount is present. 1744 ginkgo.By("checking the pod's current state and verifying that restartCount is present") 1745 pod, err := podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 1746 framework.ExpectNoError(err, fmt.Sprintf("getting pod %s in namespace %s", pod.Name, ns)) 1747 initialRestartCount := podutil.GetExistingContainerStatus(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...), containerName).RestartCount 1748 framework.Logf("Initial restart count of pod %s is %d", pod.Name, initialRestartCount) 1749 1750 // Wait for the restart state to be as desired. 1751 // If initialRestartCount is not zero, there is restarting back-off time. 1752 deadline := time.Now().Add(timeout + time.Duration(initialRestartCount*10)*time.Second) 1753 1754 lastRestartCount := initialRestartCount 1755 observedRestarts := int32(0) 1756 for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) { 1757 pod, err = podClient.Get(ctx, pod.Name, metav1.GetOptions{}) 1758 framework.Logf("Get pod %s in namespace %s", pod.Name, ns) 1759 framework.ExpectNoError(err, fmt.Sprintf("getting pod %s", pod.Name)) 1760 restartCount := podutil.GetExistingContainerStatus(append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...), containerName).RestartCount 1761 if restartCount != lastRestartCount { 1762 framework.Logf("Restart count of pod %s/%s is now %d (%v elapsed)", 1763 ns, pod.Name, restartCount, time.Since(start)) 1764 if restartCount < lastRestartCount { 1765 framework.Failf("Restart count should increment monotonically: restart cont of pod %s/%s changed from %d to %d", 1766 ns, pod.Name, lastRestartCount, restartCount) 1767 } 1768 } 1769 observedRestarts = restartCount - initialRestartCount 1770 if expectNumRestarts > 0 && int(observedRestarts) >= expectNumRestarts { 1771 // Stop if we have observed more than expectNumRestarts restarts. 1772 break 1773 } 1774 lastRestartCount = restartCount 1775 } 1776 1777 // If we expected 0 restarts, fail if observed any restart. 1778 // If we expected n restarts (n > 0), fail if we observed < n restarts. 1779 if (expectNumRestarts == 0 && observedRestarts > 0) || (expectNumRestarts > 0 && 1780 int(observedRestarts) < expectNumRestarts) { 1781 framework.Failf("pod %s/%s - expected number of restarts: %d, found restarts: %d. Pod status: %s.", 1782 ns, pod.Name, expectNumRestarts, observedRestarts, &pod.Status) 1783 } 1784 } 1785 1786 func runReadinessFailTest(ctx context.Context, f *framework.Framework, pod *v1.Pod, notReadyUntil time.Duration, waitForNotPending bool) { 1787 podClient := e2epod.NewPodClient(f) 1788 ns := f.Namespace.Name 1789 gomega.Expect(pod.Spec.Containers).NotTo(gomega.BeEmpty()) 1790 1791 // At the end of the test, clean up by removing the pod. 1792 ginkgo.DeferCleanup(func(ctx context.Context) error { 1793 ginkgo.By("deleting the pod") 1794 return podClient.Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0)) 1795 }) 1796 ginkgo.By(fmt.Sprintf("Creating pod %s in namespace %s", pod.Name, ns)) 1797 podClient.Create(ctx, pod) 1798 1799 if waitForNotPending { 1800 // Wait until the pod is not pending. (Here we need to check for something other than 1801 // 'Pending', since when failures occur, we go to 'Terminated' which can cause indefinite blocking.) 1802 framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, f.ClientSet, ns, pod.Name), 1803 fmt.Sprintf("starting pod %s in namespace %s", pod.Name, ns)) 1804 framework.Logf("Started pod %s in namespace %s", pod.Name, ns) 1805 } 1806 1807 // Wait for the not ready state to be true for notReadyUntil duration 1808 deadline := time.Now().Add(notReadyUntil) 1809 for start := time.Now(); time.Now().Before(deadline); time.Sleep(2 * time.Second) { 1810 // poll for Not Ready 1811 if podutil.IsPodReady(pod) { 1812 framework.Failf("pod %s/%s - expected to be not ready", ns, pod.Name) 1813 } 1814 1815 framework.Logf("pod %s/%s is not ready (%v elapsed)", 1816 ns, pod.Name, time.Since(start)) 1817 } 1818 } 1819 1820 func gRPCServerPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string) *v1.Pod { 1821 return &v1.Pod{ 1822 ObjectMeta: metav1.ObjectMeta{Name: "test-grpc-" + string(uuid.NewUUID())}, 1823 Spec: v1.PodSpec{ 1824 Containers: []v1.Container{ 1825 { 1826 Name: containerName, 1827 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1828 Command: []string{ 1829 "/agnhost", 1830 "grpc-health-checking", 1831 }, 1832 Ports: []v1.ContainerPort{{ContainerPort: int32(5000)}, {ContainerPort: int32(8080)}}, 1833 LivenessProbe: livenessProbe, 1834 ReadinessProbe: readinessProbe, 1835 }, 1836 }, 1837 }, 1838 } 1839 } 1840 1841 func testWebServerSidecarPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string, port int) *v1.Pod { 1842 return &v1.Pod{ 1843 ObjectMeta: metav1.ObjectMeta{Name: "test-webserver-sidecar-" + string(uuid.NewUUID())}, 1844 Spec: v1.PodSpec{ 1845 InitContainers: []v1.Container{ 1846 { 1847 Name: containerName, 1848 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1849 Args: []string{"test-webserver", "--port", fmt.Sprintf("%d", port)}, 1850 Ports: []v1.ContainerPort{{ContainerPort: int32(port)}}, 1851 LivenessProbe: livenessProbe, 1852 ReadinessProbe: readinessProbe, 1853 RestartPolicy: func() *v1.ContainerRestartPolicy { 1854 restartPolicy := v1.ContainerRestartPolicyAlways 1855 return &restartPolicy 1856 }(), 1857 }, 1858 }, 1859 Containers: []v1.Container{ 1860 { 1861 Name: "main", 1862 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1863 Args: []string{"pause"}, 1864 }, 1865 }, 1866 }, 1867 } 1868 } 1869 1870 func busyBoxSidecarPodSpec(readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1871 return &v1.Pod{ 1872 ObjectMeta: metav1.ObjectMeta{ 1873 Name: "busybox-sidecar-" + string(uuid.NewUUID()), 1874 Labels: map[string]string{"test": "liveness"}, 1875 }, 1876 Spec: v1.PodSpec{ 1877 InitContainers: []v1.Container{ 1878 { 1879 Name: "busybox", 1880 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1881 Command: cmd, 1882 LivenessProbe: livenessProbe, 1883 ReadinessProbe: readinessProbe, 1884 RestartPolicy: func() *v1.ContainerRestartPolicy { 1885 restartPolicy := v1.ContainerRestartPolicyAlways 1886 return &restartPolicy 1887 }(), 1888 }, 1889 }, 1890 Containers: []v1.Container{ 1891 { 1892 Name: "main", 1893 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1894 Args: []string{"pause"}, 1895 }, 1896 }, 1897 }, 1898 } 1899 } 1900 1901 func livenessSidecarPodSpec(namespace string, readinessProbe, livenessProbe *v1.Probe) *v1.Pod { 1902 return &v1.Pod{ 1903 ObjectMeta: metav1.ObjectMeta{ 1904 Name: "test-liveness-sidecar-" + string(uuid.NewUUID()), 1905 Labels: map[string]string{"test": "liveness"}, 1906 Namespace: namespace, 1907 }, 1908 Spec: v1.PodSpec{ 1909 InitContainers: []v1.Container{ 1910 { 1911 Name: "sidecar", 1912 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1913 Args: []string{"liveness"}, 1914 LivenessProbe: livenessProbe, 1915 ReadinessProbe: readinessProbe, 1916 RestartPolicy: func() *v1.ContainerRestartPolicy { 1917 restartPolicy := v1.ContainerRestartPolicyAlways 1918 return &restartPolicy 1919 }(), 1920 }, 1921 }, 1922 Containers: []v1.Container{ 1923 { 1924 Name: "main", 1925 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1926 Args: []string{"pause"}, 1927 }, 1928 }, 1929 }, 1930 } 1931 } 1932 1933 func startupSidecarPodSpec(startupProbe, readinessProbe, livenessProbe *v1.Probe, cmd []string) *v1.Pod { 1934 return &v1.Pod{ 1935 ObjectMeta: metav1.ObjectMeta{ 1936 Name: "startup-sidecar-" + string(uuid.NewUUID()), 1937 Labels: map[string]string{"test": "startup"}, 1938 }, 1939 Spec: v1.PodSpec{ 1940 InitContainers: []v1.Container{ 1941 { 1942 Name: "sidecar", 1943 Image: imageutils.GetE2EImage(imageutils.BusyBox), 1944 Command: cmd, 1945 LivenessProbe: livenessProbe, 1946 ReadinessProbe: readinessProbe, 1947 StartupProbe: startupProbe, 1948 RestartPolicy: func() *v1.ContainerRestartPolicy { 1949 restartPolicy := v1.ContainerRestartPolicyAlways 1950 return &restartPolicy 1951 }(), 1952 }, 1953 }, 1954 Containers: []v1.Container{ 1955 { 1956 Name: "main", 1957 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1958 Args: []string{"pause"}, 1959 }, 1960 }, 1961 }, 1962 } 1963 } 1964 1965 func gRPCServerSidecarPodSpec(readinessProbe, livenessProbe *v1.Probe, containerName string) *v1.Pod { 1966 return &v1.Pod{ 1967 ObjectMeta: metav1.ObjectMeta{Name: "test-grpc-sidecar-" + string(uuid.NewUUID())}, 1968 Spec: v1.PodSpec{ 1969 InitContainers: []v1.Container{ 1970 { 1971 Name: containerName, 1972 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1973 Command: []string{ 1974 "/agnhost", 1975 "grpc-health-checking", 1976 }, 1977 Ports: []v1.ContainerPort{{ContainerPort: int32(5000)}, {ContainerPort: int32(8080)}}, 1978 LivenessProbe: livenessProbe, 1979 ReadinessProbe: readinessProbe, 1980 RestartPolicy: func() *v1.ContainerRestartPolicy { 1981 restartPolicy := v1.ContainerRestartPolicyAlways 1982 return &restartPolicy 1983 }(), 1984 }, 1985 }, 1986 Containers: []v1.Container{ 1987 { 1988 Name: "main", 1989 Image: imageutils.GetE2EImage(imageutils.Agnhost), 1990 Args: []string{"pause"}, 1991 }, 1992 }, 1993 }, 1994 } 1995 }