k8s.io/kubernetes@v1.29.3/test/e2e_node/unknown_pods_test.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"context"
    21  	"os"
    22  
    23  	"github.com/onsi/ginkgo/v2"
    24  	"github.com/onsi/gomega"
    25  	v1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/util/uuid"
    28  	"k8s.io/kubernetes/test/e2e/framework"
    29  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    30  	imageutils "k8s.io/kubernetes/test/utils/image"
    31  	admissionapi "k8s.io/pod-security-admission/api"
    32  )
    33  
    34  /*
    35  * Unknown pods are pods which are unknown pods to the kubelet, but are still
    36  * running in the container runtime. If kubelet detects a pod which is not in
    37  * the config (i.e. not present in API-server or static pod), but running as
    38  * detected in container runtime, kubelet should aggressively terminate the pod.
    39  *
    40  * This situation can be encountered if a pod is running, then kubelet is
    41  * stopped, and while stopped, the manifest is deleted (by force deleting the
    42  * API pod or deleting the static pod manifest), and then restarting the
    43  * kubelet. Upon restart, kubelet will see the pod as running via the container
    44  * runtime, but it will not be present in the config, thus making the pod a
    45  * "unknown pod". Kubelet should then proceed to terminate these unknown pods.
    46   */
    47  var _ = SIGDescribe("Unknown Pods", framework.WithSerial(), framework.WithDisruptive(), func() {
    48  	f := framework.NewDefaultFramework("unknown-pods")
    49  	f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
    50  
    51  	ginkgo.Context("when creating a mirror pod", func() {
    52  		var ns, podPath, staticPodName, mirrorPodName string
    53  		ginkgo.BeforeEach(func(ctx context.Context) {
    54  			ns = f.Namespace.Name
    55  			staticPodName = "unknown-test-pod-" + string(uuid.NewUUID())
    56  			mirrorPodName = staticPodName + "-" + framework.TestContext.NodeName
    57  
    58  			podPath = kubeletCfg.StaticPodPath
    59  
    60  			framework.Logf("create the static pod %v", staticPodName)
    61  			err := createStaticPodWithGracePeriod(podPath, staticPodName, ns)
    62  			framework.ExpectNoError(err)
    63  
    64  			framework.Logf("wait for the mirror pod %v to be running", mirrorPodName)
    65  			gomega.Eventually(ctx, func(ctx context.Context) error {
    66  				return checkMirrorPodRunning(ctx, f.ClientSet, mirrorPodName, ns)
    67  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeNil())
    68  		})
    69  
    70  		ginkgo.It("the static pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
    71  			framework.Logf("Stopping the kubelet")
    72  			startKubelet := stopKubelet()
    73  
    74  			pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, mirrorPodName, metav1.GetOptions{})
    75  			framework.ExpectNoError(err)
    76  
    77  			// wait until the kubelet health check will fail
    78  			gomega.Eventually(ctx, func() bool {
    79  				return kubeletHealthCheck(kubeletHealthCheckURL)
    80  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
    81  
    82  			framework.Logf("Delete the static pod manifest while the kubelet is not running")
    83  			file := staticPodPath(podPath, staticPodName, ns)
    84  			framework.Logf("deleting static pod manifest %q", file)
    85  			err = os.Remove(file)
    86  			framework.ExpectNoError(err)
    87  
    88  			framework.Logf("Starting the kubelet")
    89  			startKubelet()
    90  
    91  			// wait until the kubelet health check will succeed
    92  			gomega.Eventually(ctx, func() bool {
    93  				return kubeletHealthCheck(kubeletHealthCheckURL)
    94  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
    95  
    96  			framework.Logf("wait for the mirror pod %v to disappear", mirrorPodName)
    97  			gomega.Eventually(ctx, func(ctx context.Context) error {
    98  				return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
    99  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   100  
   101  			waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
   102  		})
   103  
   104  		ginkgo.AfterEach(func(ctx context.Context) {
   105  			framework.Logf("deleting the static pod %v", staticPodName)
   106  			err := deleteStaticPod(podPath, staticPodName, ns)
   107  			if !os.IsNotExist(err) {
   108  				framework.ExpectNoError(err)
   109  			}
   110  
   111  			framework.Logf("wait for the mirror pod to disappear")
   112  			gomega.Eventually(ctx, func(ctx context.Context) error {
   113  				return checkMirrorPodDisappear(ctx, f.ClientSet, mirrorPodName, ns)
   114  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   115  		})
   116  	})
   117  
   118  	ginkgo.Context("when creating a API pod", func() {
   119  		var ns, podName string
   120  
   121  		ginkgo.BeforeEach(func(ctx context.Context) {
   122  			ns = f.Namespace.Name
   123  			podName = "unknown-test-pause-pod-" + string(uuid.NewUUID())
   124  			pod := &v1.Pod{
   125  				ObjectMeta: metav1.ObjectMeta{
   126  					Name: podName,
   127  				},
   128  				Spec: v1.PodSpec{
   129  					Containers: []v1.Container{
   130  						{
   131  							Name:  "pause",
   132  							Image: imageutils.GetPauseImageName(),
   133  						},
   134  					},
   135  				},
   136  			}
   137  
   138  			e2epod.NewPodClient(f).CreateSync(ctx, pod)
   139  		})
   140  
   141  		ginkgo.It("the api pod should be terminated and cleaned up due to becoming a unknown pod due to being force deleted while kubelet is not running", func(ctx context.Context) {
   142  			framework.Logf("Stopping the kubelet")
   143  			startKubelet := stopKubelet()
   144  
   145  			pod, err := f.ClientSet.CoreV1().Pods(ns).Get(ctx, podName, metav1.GetOptions{})
   146  			framework.ExpectNoError(err)
   147  
   148  			// wait until the kubelet health check will fail
   149  			gomega.Eventually(ctx, func() bool {
   150  				return kubeletHealthCheck(kubeletHealthCheckURL)
   151  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeFalse())
   152  
   153  			framework.Logf("Delete the pod while the kubelet is not running")
   154  			// Delete pod sync by name will force delete the pod, removing it from kubelet's config
   155  			deletePodSyncByName(ctx, f, podName)
   156  
   157  			framework.Logf("Starting the kubelet")
   158  			startKubelet()
   159  
   160  			// wait until the kubelet health check will succeed
   161  			gomega.Eventually(ctx, func() bool {
   162  				return kubeletHealthCheck(kubeletHealthCheckURL)
   163  			}, f.Timeouts.PodStart, f.Timeouts.Poll).Should(gomega.BeTrue())
   164  
   165  			framework.Logf("wait for the pod %v to disappear", podName)
   166  			gomega.Eventually(ctx, func(ctx context.Context) error {
   167  				return checkMirrorPodDisappear(ctx, f.ClientSet, podName, ns)
   168  			}, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.BeNil())
   169  
   170  			waitForAllContainerRemoval(ctx, pod.Name, pod.Namespace)
   171  		})
   172  	})
   173  })