k8s.io/kubernetes@v1.29.3/test/e2e_node/mirror_pod_grace_period_test.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "strings" 24 "time" 25 26 "github.com/onsi/ginkgo/v2" 27 "github.com/onsi/gomega" 28 "github.com/onsi/gomega/gstruct" 29 "github.com/prometheus/common/model" 30 v1 "k8s.io/api/core/v1" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/types" 33 "k8s.io/apimachinery/pkg/util/uuid" 34 clientset "k8s.io/client-go/kubernetes" 35 "k8s.io/kubernetes/test/e2e/framework" 36 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 37 imageutils "k8s.io/kubernetes/test/utils/image" 38 admissionapi "k8s.io/pod-security-admission/api" 39 ) 40 41 var _ = SIGDescribe("MirrorPodWithGracePeriod", func() { 42 f := framework.NewDefaultFramework("mirror-pod-with-grace-period") 43 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 44 ginkgo.Context("when create a mirror pod ", func() { 45 var ns, podPath, staticPodName, mirrorPodName string 46 ginkgo.BeforeEach(func(ctx context.Context) { 47 ns = f.Namespace.Name 48 staticPodName = "graceful-pod-" + string(uuid.NewUUID()) 49 mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName 50 51 podPath = kubeletCfg.StaticPodPath 52 53 ginkgo.By("create the static pod") 54 err := createStaticPodWithGracePeriod(podPath, staticPodName, ns) 55 framework.ExpectNoError(err) 56 57 ginkgo.By("wait for the mirror pod to be running") 58 gomega.Eventually(ctx, func(ctx context.Context) error { 59 return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns) 60 }, 2*time.Minute, time.Second*4).Should(gomega.BeNil()) 61 }) 62 63 f.It("mirror pod termination should satisfy grace period when static pod is deleted", f.WithNodeConformance(), func(ctx context.Context) { 64 ginkgo.By("get mirror pod uid") 65 pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{}) 66 framework.ExpectNoError(err) 67 uid := pod.UID 68 69 ginkgo.By("delete the static pod") 70 file := staticPodPath(podPath, staticPodName, ns) 71 framework.Logf("deleting static pod manifest %q", file) 72 err = os.Remove(file) 73 framework.ExpectNoError(err) 74 75 ginkgo.By("wait for the mirror pod to be running for grace period") 76 gomega.Consistently(ctx, func(ctx context.Context) error { 77 return checkMirrorPodRunningWithUID(ctx, f.ClientSet, mirrorPodName, ns, uid) 78 }, 19*time.Second, 200*time.Millisecond).Should(gomega.BeNil()) 79 }) 80 81 f.It("mirror pod termination should satisfy grace period when static pod is updated", f.WithNodeConformance(), func(ctx context.Context) { 82 ginkgo.By("get mirror pod uid") 83 pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{}) 84 framework.ExpectNoError(err) 85 uid := pod.UID 86 87 ginkgo.By("update the static pod container image") 88 image := imageutils.GetPauseImageName() 89 err = createStaticPod(podPath, staticPodName, ns, image, v1.RestartPolicyAlways) 90 framework.ExpectNoError(err) 91 92 ginkgo.By("wait for the mirror pod to be running for grace period") 93 gomega.Consistently(ctx, func(ctx context.Context) error { 94 return checkMirrorPodRunningWithUID(ctx, f.ClientSet, mirrorPodName, ns, uid) 95 }, 19*time.Second, 200*time.Millisecond).Should(gomega.BeNil()) 96 97 ginkgo.By("wait for the mirror pod to be updated") 98 gomega.Eventually(ctx, func(ctx context.Context) error { 99 return checkMirrorPodRecreatedAndRunning(ctx, f.ClientSet, mirrorPodName, ns, uid) 100 }, 2*time.Minute, time.Second*4).Should(gomega.BeNil()) 101 102 ginkgo.By("check the mirror pod container image is updated") 103 pod, err = f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{}) 104 framework.ExpectNoError(err) 105 gomega.Expect(pod.Spec.Containers).To(gomega.HaveLen(1)) 106 gomega.Expect(pod.Spec.Containers[0].Image).To(gomega.Equal(image)) 107 }) 108 109 f.It("should update a static pod when the static pod is updated multiple times during the graceful termination period", f.WithNodeConformance(), func(ctx context.Context) { 110 ginkgo.By("get mirror pod uid") 111 pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{}) 112 framework.ExpectNoError(err) 113 uid := pod.UID 114 115 ginkgo.By("update the pod manifest multiple times during the graceful termination period") 116 for i := 0; i < 300; i++ { 117 err = createStaticPod(podPath, staticPodName, ns, 118 fmt.Sprintf("image-%d", i), v1.RestartPolicyAlways) 119 framework.ExpectNoError(err) 120 time.Sleep(100 * time.Millisecond) 121 } 122 image := imageutils.GetPauseImageName() 123 err = createStaticPod(podPath, staticPodName, ns, image, v1.RestartPolicyAlways) 124 framework.ExpectNoError(err) 125 126 ginkgo.By("wait for the mirror pod to be updated") 127 gomega.Eventually(ctx, func(ctx context.Context) error { 128 return checkMirrorPodRecreatedAndRunning(ctx, f.ClientSet, mirrorPodName, ns, uid) 129 }, 2*time.Minute, time.Second*4).Should(gomega.BeNil()) 130 131 ginkgo.By("check the mirror pod container image is updated") 132 pod, err = f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{}) 133 framework.ExpectNoError(err) 134 gomega.Expect(pod.Spec.Containers).To(gomega.HaveLen(1)) 135 gomega.Expect(pod.Spec.Containers[0].Image).To(gomega.Equal(image)) 136 }) 137 138 f.Context("and the container runtime is temporarily down during pod termination", f.WithNodeConformance(), f.WithSerial(), f.WithDisruptive(), func() { 139 ginkgo.BeforeEach(func(ctx context.Context) { 140 // Ensure that prior to the test starting, no other pods are running or in the process of being terminated other than the mirror pod. 141 // This is necessary as the test verifies metrics that assume that there is only one pod (the static pod) being run, and all other pods have been terminated. 142 gomega.Eventually(ctx, func(ctx context.Context) error { 143 podList, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{}) 144 if err != nil { 145 return fmt.Errorf("failed listing pods while waiting for all pods to be terminated: %v", err) 146 } 147 var remainingPods []string 148 149 for _, pod := range podList.Items { 150 // The mirror pod is the only expected pod to be running 151 if pod.Name == mirrorPodName && pod.Namespace == ns { 152 continue 153 } 154 remainingPods = append(remainingPods, fmt.Sprintf("(%s/%s)", pod.Namespace, pod.Name)) 155 } 156 157 if len(remainingPods) > 0 { 158 return fmt.Errorf("not all pods are terminated yet prior to starting mirror pod test: %v pods that still exist: %v", len(remainingPods), strings.Join(remainingPods, ",")) 159 } 160 return nil 161 }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.Succeed()) 162 }) 163 ginkgo.It("the mirror pod should terminate successfully", func(ctx context.Context) { 164 ginkgo.By("verifying the pod is described as syncing in metrics") 165 gomega.Eventually(ctx, getKubeletMetrics, 5*time.Second, time.Second).Should(gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ 166 "kubelet_working_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 167 `kubelet_working_pods{config="desired", lifecycle="sync", static=""}`: timelessSample(0), 168 `kubelet_working_pods{config="desired", lifecycle="sync", static="true"}`: timelessSample(1), 169 `kubelet_working_pods{config="orphan", lifecycle="sync", static=""}`: timelessSample(0), 170 `kubelet_working_pods{config="orphan", lifecycle="sync", static="true"}`: timelessSample(0), 171 `kubelet_working_pods{config="runtime_only", lifecycle="sync", static="unknown"}`: timelessSample(0), 172 `kubelet_working_pods{config="desired", lifecycle="terminating", static=""}`: timelessSample(0), 173 `kubelet_working_pods{config="desired", lifecycle="terminating", static="true"}`: timelessSample(0), 174 `kubelet_working_pods{config="orphan", lifecycle="terminating", static=""}`: timelessSample(0), 175 `kubelet_working_pods{config="orphan", lifecycle="terminating", static="true"}`: timelessSample(0), 176 `kubelet_working_pods{config="runtime_only", lifecycle="terminating", static="unknown"}`: timelessSample(0), 177 `kubelet_working_pods{config="desired", lifecycle="terminated", static=""}`: timelessSample(0), 178 `kubelet_working_pods{config="desired", lifecycle="terminated", static="true"}`: timelessSample(0), 179 `kubelet_working_pods{config="orphan", lifecycle="terminated", static=""}`: timelessSample(0), 180 `kubelet_working_pods{config="orphan", lifecycle="terminated", static="true"}`: timelessSample(0), 181 `kubelet_working_pods{config="runtime_only", lifecycle="terminated", static="unknown"}`: timelessSample(0), 182 }), 183 "kubelet_mirror_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 184 `kubelet_mirror_pods`: timelessSample(1), 185 }), 186 "kubelet_active_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 187 `kubelet_active_pods{static=""}`: timelessSample(0), 188 `kubelet_active_pods{static="true"}`: timelessSample(1), 189 }), 190 "kubelet_desired_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 191 `kubelet_desired_pods{static=""}`: timelessSample(0), 192 `kubelet_desired_pods{static="true"}`: timelessSample(1), 193 }), 194 })) 195 196 ginkgo.By("delete the static pod") 197 err := deleteStaticPod(podPath, staticPodName, ns) 198 framework.ExpectNoError(err) 199 200 // Note it is important we have a small delay here as we would like to reproduce https://issues.k8s.io/113091 which requires a failure in syncTerminatingPod() 201 // This requires waiting a small period between the static pod being deleted so that syncTerminatingPod() will attempt to run 202 ginkgo.By("sleeping before stopping the container runtime") 203 time.Sleep(2 * time.Second) 204 205 ginkgo.By("stop the container runtime") 206 err = stopContainerRuntime() 207 framework.ExpectNoError(err, "expected no error stopping the container runtime") 208 209 ginkgo.By("waiting for the container runtime to be stopped") 210 gomega.Eventually(ctx, func(ctx context.Context) error { 211 _, _, err := getCRIClient() 212 return err 213 }, 2*time.Minute, time.Second*5).ShouldNot(gomega.Succeed()) 214 215 ginkgo.By("verifying the mirror pod is running") 216 gomega.Consistently(ctx, func(ctx context.Context) error { 217 return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns) 218 }, 19*time.Second, 200*time.Millisecond).Should(gomega.BeNil()) 219 220 ginkgo.By("verifying the pod is described as terminating in metrics") 221 gomega.Eventually(ctx, getKubeletMetrics, 5*time.Second, time.Second).Should(gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ 222 "kubelet_working_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 223 `kubelet_working_pods{config="desired", lifecycle="sync", static=""}`: timelessSample(0), 224 `kubelet_working_pods{config="desired", lifecycle="sync", static="true"}`: timelessSample(0), 225 `kubelet_working_pods{config="orphan", lifecycle="sync", static=""}`: timelessSample(0), 226 `kubelet_working_pods{config="orphan", lifecycle="sync", static="true"}`: timelessSample(0), 227 `kubelet_working_pods{config="runtime_only", lifecycle="sync", static="unknown"}`: timelessSample(0), 228 `kubelet_working_pods{config="desired", lifecycle="terminating", static=""}`: timelessSample(0), 229 `kubelet_working_pods{config="desired", lifecycle="terminating", static="true"}`: timelessSample(0), 230 `kubelet_working_pods{config="orphan", lifecycle="terminating", static=""}`: timelessSample(0), 231 `kubelet_working_pods{config="orphan", lifecycle="terminating", static="true"}`: timelessSample(1), 232 `kubelet_working_pods{config="runtime_only", lifecycle="terminating", static="unknown"}`: timelessSample(0), 233 `kubelet_working_pods{config="desired", lifecycle="terminated", static=""}`: timelessSample(0), 234 `kubelet_working_pods{config="desired", lifecycle="terminated", static="true"}`: timelessSample(0), 235 `kubelet_working_pods{config="orphan", lifecycle="terminated", static=""}`: timelessSample(0), 236 `kubelet_working_pods{config="orphan", lifecycle="terminated", static="true"}`: timelessSample(0), 237 `kubelet_working_pods{config="runtime_only", lifecycle="terminated", static="unknown"}`: timelessSample(0), 238 }), 239 "kubelet_mirror_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 240 `kubelet_mirror_pods`: timelessSample(1), 241 }), 242 "kubelet_active_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 243 `kubelet_active_pods{static=""}`: timelessSample(0), 244 // TODO: the pod is still running and consuming resources, it should be considered in 245 // admission https://github.com/kubernetes/kubernetes/issues/104824 for static pods at 246 // least, which means it should be 1 247 `kubelet_active_pods{static="true"}`: timelessSample(0), 248 }), 249 "kubelet_desired_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 250 `kubelet_desired_pods{static=""}`: timelessSample(0), 251 `kubelet_desired_pods{static="true"}`: timelessSample(0), 252 })})) 253 254 ginkgo.By("start the container runtime") 255 err = startContainerRuntime() 256 framework.ExpectNoError(err, "expected no error starting the container runtime") 257 ginkgo.By("waiting for the container runtime to start") 258 gomega.Eventually(ctx, func(ctx context.Context) error { 259 r, _, err := getCRIClient() 260 if err != nil { 261 return fmt.Errorf("error getting CRI client: %w", err) 262 } 263 status, err := r.Status(ctx, true) 264 if err != nil { 265 return fmt.Errorf("error checking CRI status: %w", err) 266 } 267 framework.Logf("Runtime started: %#v", status) 268 return nil 269 }, 2*time.Minute, time.Second*5).Should(gomega.Succeed()) 270 271 ginkgo.By(fmt.Sprintf("verifying that the mirror pod (%s/%s) stops running after about 30s", ns, mirrorPodName)) 272 // from the time the container runtime starts, it should take a maximum of: 273 // 20s (grace period) + 2 sync transitions * 1s + 2s between housekeeping + 3s to detect CRI up + 274 // 2s overhead 275 // which we calculate here as "about 30s", so we try a bit longer than that but verify that it is 276 // tightly bounded by not waiting longer (we want to catch regressions to shutdown) 277 time.Sleep(30 * time.Second) 278 gomega.Eventually(ctx, func(ctx context.Context) error { 279 return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns) 280 }, time.Second*3, time.Second).Should(gomega.Succeed()) 281 282 ginkgo.By("verifying the pod finishes terminating and is removed from metrics") 283 gomega.Eventually(ctx, getKubeletMetrics, 15*time.Second, time.Second).Should(gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{ 284 "kubelet_working_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 285 `kubelet_working_pods{config="desired", lifecycle="sync", static=""}`: timelessSample(0), 286 `kubelet_working_pods{config="desired", lifecycle="sync", static="true"}`: timelessSample(0), 287 `kubelet_working_pods{config="orphan", lifecycle="sync", static=""}`: timelessSample(0), 288 `kubelet_working_pods{config="orphan", lifecycle="sync", static="true"}`: timelessSample(0), 289 `kubelet_working_pods{config="runtime_only", lifecycle="sync", static="unknown"}`: timelessSample(0), 290 `kubelet_working_pods{config="desired", lifecycle="terminating", static=""}`: timelessSample(0), 291 `kubelet_working_pods{config="desired", lifecycle="terminating", static="true"}`: timelessSample(0), 292 `kubelet_working_pods{config="orphan", lifecycle="terminating", static=""}`: timelessSample(0), 293 `kubelet_working_pods{config="orphan", lifecycle="terminating", static="true"}`: timelessSample(0), 294 `kubelet_working_pods{config="runtime_only", lifecycle="terminating", static="unknown"}`: timelessSample(0), 295 `kubelet_working_pods{config="desired", lifecycle="terminated", static=""}`: timelessSample(0), 296 `kubelet_working_pods{config="desired", lifecycle="terminated", static="true"}`: timelessSample(0), 297 `kubelet_working_pods{config="orphan", lifecycle="terminated", static=""}`: timelessSample(0), 298 `kubelet_working_pods{config="orphan", lifecycle="terminated", static="true"}`: timelessSample(0), 299 `kubelet_working_pods{config="runtime_only", lifecycle="terminated", static="unknown"}`: timelessSample(0), 300 }), 301 "kubelet_mirror_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 302 `kubelet_mirror_pods`: timelessSample(0), 303 }), 304 "kubelet_active_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 305 `kubelet_active_pods{static=""}`: timelessSample(0), 306 `kubelet_active_pods{static="true"}`: timelessSample(0), 307 }), 308 "kubelet_desired_pods": gstruct.MatchElements(sampleLabelID, 0, gstruct.Elements{ 309 `kubelet_desired_pods{static=""}`: timelessSample(0), 310 `kubelet_desired_pods{static="true"}`: timelessSample(0), 311 }), 312 })) 313 }) 314 315 ginkgo.AfterEach(func(ctx context.Context) { 316 ginkgo.By("starting the container runtime") 317 err := startContainerRuntime() 318 framework.ExpectNoError(err, "expected no error starting the container runtime") 319 ginkgo.By("waiting for the container runtime to start") 320 gomega.Eventually(ctx, func(ctx context.Context) error { 321 _, _, err := getCRIClient() 322 if err != nil { 323 return fmt.Errorf("error getting cri client: %v", err) 324 } 325 return nil 326 }, 2*time.Minute, time.Second*5).Should(gomega.Succeed()) 327 }) 328 }) 329 330 ginkgo.AfterEach(func(ctx context.Context) { 331 ginkgo.By("delete the static pod") 332 err := deleteStaticPod(podPath, staticPodName, ns) 333 if !os.IsNotExist(err) { 334 framework.ExpectNoError(err) 335 } 336 337 ginkgo.By("wait for the mirror pod to disappear") 338 gomega.Eventually(ctx, func(ctx context.Context) error { 339 return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns) 340 }, 2*time.Minute, time.Second*4).Should(gomega.BeNil()) 341 }) 342 }) 343 }) 344 345 func createStaticPodWithGracePeriod(dir, name, namespace string) error { 346 template := ` 347 apiVersion: v1 348 kind: Pod 349 metadata: 350 name: %s 351 namespace: %s 352 spec: 353 terminationGracePeriodSeconds: 20 354 containers: 355 - name: m-test 356 image: %s 357 command: 358 - /bin/sh 359 args: 360 - '-c' 361 - | 362 _term() { 363 echo "Caught SIGTERM signal!" 364 sleep 100 365 } 366 trap _term SIGTERM 367 sleep 1000 368 ` 369 file := staticPodPath(dir, name, namespace) 370 podYaml := fmt.Sprintf(template, name, namespace, imageutils.GetE2EImage(imageutils.BusyBox)) 371 372 f, err := os.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0666) 373 if err != nil { 374 return err 375 } 376 defer f.Close() 377 378 _, err = f.WriteString(podYaml) 379 framework.Logf("has written %v", file) 380 return err 381 } 382 383 func checkMirrorPodRunningWithUID(ctx context.Context, cl clientset.Interface, name, namespace string, oUID types.UID) error { 384 pod, err := cl.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{}) 385 if err != nil { 386 return fmt.Errorf("expected the mirror pod %q to appear: %w", name, err) 387 } 388 if pod.UID != oUID { 389 return fmt.Errorf("expected the uid of mirror pod %q to be same, got %q", name, pod.UID) 390 } 391 if pod.Status.Phase != v1.PodRunning { 392 return fmt.Errorf("expected the mirror pod %q to be running, got %q", name, pod.Status.Phase) 393 } 394 return nil 395 } 396 397 func sampleLabelID(element interface{}) string { 398 el := element.(*model.Sample) 399 return el.Metric.String() 400 }