k8s.io/kubernetes@v1.29.3/test/e2e_node/garbage_collector_test.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "fmt" 22 "strconv" 23 "time" 24 25 v1 "k8s.io/api/core/v1" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 internalapi "k8s.io/cri-api/pkg/apis" 28 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" 29 "k8s.io/kubelet/pkg/types" 30 "k8s.io/kubernetes/test/e2e/framework" 31 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 32 "k8s.io/kubernetes/test/e2e/nodefeature" 33 admissionapi "k8s.io/pod-security-admission/api" 34 35 "github.com/onsi/ginkgo/v2" 36 "github.com/onsi/gomega" 37 ) 38 39 const ( 40 //TODO (dashpole): Once dynamic config is possible, test different values for maxPerPodContainer and maxContainers 41 // Currently using default values for maxPerPodContainer and maxTotalContainers 42 maxPerPodContainer = 1 43 maxTotalContainers = -1 44 45 garbageCollectDuration = 3 * time.Minute 46 setupDuration = 10 * time.Minute 47 runtimePollInterval = 10 * time.Second 48 ) 49 50 type testPodSpec struct { 51 podName string 52 // containerPrefix must be unique for each pod, and cannot end in a number. 53 // containerPrefix is used to identify which containers belong to which pod in the test. 54 containerPrefix string 55 // the number of times each container should restart 56 restartCount int32 57 // the number of containers in the test pod 58 numContainers int 59 // a function that returns the number of containers currently on the node (including dead containers). 60 getContainerNames func() ([]string, error) 61 } 62 63 func (pod *testPodSpec) getContainerName(containerNumber int) string { 64 return fmt.Sprintf("%s%d", pod.containerPrefix, containerNumber) 65 } 66 67 type testRun struct { 68 // Name for logging purposes 69 testName string 70 // Pod specs for the test 71 testPods []*testPodSpec 72 } 73 74 // GarbageCollect tests that the Kubelet conforms to the Kubelet Garbage Collection Policy, found here: 75 // http://kubernetes.io/docs/admin/garbage-collection/ 76 var _ = SIGDescribe("GarbageCollect", framework.WithSerial(), nodefeature.GarbageCollect, func() { 77 f := framework.NewDefaultFramework("garbage-collect-test") 78 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 79 containerNamePrefix := "gc-test-container-" 80 podNamePrefix := "gc-test-pod-" 81 82 // These suffixes are appended to pod and container names. 83 // They differentiate pods from one another, and allow filtering 84 // by names to identify which containers belong to which pods 85 // They must be unique, and must not end in a number 86 firstSuffix := "one-container-no-restarts" 87 secondSuffix := "many-containers-many-restarts-one-pod" 88 thirdSuffix := "many-containers-many-restarts-" 89 tests := []testRun{ 90 { 91 testName: "One Non-restarting Container", 92 testPods: []*testPodSpec{ 93 { 94 podName: podNamePrefix + firstSuffix, 95 containerPrefix: containerNamePrefix + firstSuffix, 96 restartCount: 0, 97 numContainers: 1, 98 }, 99 }, 100 }, 101 { 102 testName: "Many Restarting Containers", 103 testPods: []*testPodSpec{ 104 { 105 podName: podNamePrefix + secondSuffix, 106 containerPrefix: containerNamePrefix + secondSuffix, 107 restartCount: 4, 108 numContainers: 4, 109 }, 110 }, 111 }, 112 { 113 testName: "Many Pods with Many Restarting Containers", 114 testPods: []*testPodSpec{ 115 { 116 podName: podNamePrefix + thirdSuffix + "one", 117 containerPrefix: containerNamePrefix + thirdSuffix + "one", 118 restartCount: 3, 119 numContainers: 4, 120 }, 121 { 122 podName: podNamePrefix + thirdSuffix + "two", 123 containerPrefix: containerNamePrefix + thirdSuffix + "two", 124 restartCount: 2, 125 numContainers: 6, 126 }, 127 { 128 podName: podNamePrefix + thirdSuffix + "three", 129 containerPrefix: containerNamePrefix + thirdSuffix + "three", 130 restartCount: 3, 131 numContainers: 5, 132 }, 133 }, 134 }, 135 } 136 for _, test := range tests { 137 containerGCTest(f, test) 138 } 139 }) 140 141 // Tests the following: 142 // 143 // pods are created, and all containers restart the specified number of times 144 // while containers are running, the number of copies of a single container does not exceed maxPerPodContainer 145 // while containers are running, the total number of containers does not exceed maxTotalContainers 146 // while containers are running, if not constrained by maxPerPodContainer or maxTotalContainers, keep an extra copy of each container 147 // once pods are killed, all containers are eventually cleaned up 148 func containerGCTest(f *framework.Framework, test testRun) { 149 var runtime internalapi.RuntimeService 150 ginkgo.BeforeEach(func() { 151 var err error 152 runtime, _, err = getCRIClient() 153 framework.ExpectNoError(err) 154 }) 155 for _, pod := range test.testPods { 156 // Initialize the getContainerNames function to use CRI runtime client. 157 pod.getContainerNames = func() ([]string, error) { 158 relevantContainers := []string{} 159 containers, err := runtime.ListContainers(context.Background(), &runtimeapi.ContainerFilter{ 160 LabelSelector: map[string]string{ 161 types.KubernetesPodNameLabel: pod.podName, 162 types.KubernetesPodNamespaceLabel: f.Namespace.Name, 163 }, 164 }) 165 if err != nil { 166 return relevantContainers, err 167 } 168 for _, container := range containers { 169 relevantContainers = append(relevantContainers, container.Labels[types.KubernetesContainerNameLabel]) 170 } 171 return relevantContainers, nil 172 } 173 } 174 175 ginkgo.Context(fmt.Sprintf("Garbage Collection Test: %s", test.testName), func() { 176 ginkgo.BeforeEach(func(ctx context.Context) { 177 realPods := getPods(test.testPods) 178 e2epod.NewPodClient(f).CreateBatch(ctx, realPods) 179 ginkgo.By("Making sure all containers restart the specified number of times") 180 gomega.Eventually(ctx, func(ctx context.Context) error { 181 for _, podSpec := range test.testPods { 182 err := verifyPodRestartCount(ctx, f, podSpec.podName, podSpec.numContainers, podSpec.restartCount) 183 if err != nil { 184 return err 185 } 186 } 187 return nil 188 }, setupDuration, runtimePollInterval).Should(gomega.BeNil()) 189 }) 190 191 ginkgo.It("Should eventually garbage collect containers when we exceed the number of dead containers per container", func(ctx context.Context) { 192 totalContainers := 0 193 for _, pod := range test.testPods { 194 totalContainers += pod.numContainers*2 + 1 195 } 196 gomega.Eventually(ctx, func() error { 197 total := 0 198 for _, pod := range test.testPods { 199 containerNames, err := pod.getContainerNames() 200 if err != nil { 201 return err 202 } 203 total += len(containerNames) 204 // Check maxPerPodContainer for each container in the pod 205 for i := 0; i < pod.numContainers; i++ { 206 containerCount := 0 207 for _, containerName := range containerNames { 208 if containerName == pod.getContainerName(i) { 209 containerCount++ 210 } 211 } 212 if containerCount > maxPerPodContainer+1 { 213 return fmt.Errorf("expected number of copies of container: %s, to be <= maxPerPodContainer: %d; list of containers: %v", 214 pod.getContainerName(i), maxPerPodContainer, containerNames) 215 } 216 } 217 } 218 //Check maxTotalContainers. Currently, the default is -1, so this will never happen until we can configure maxTotalContainers 219 if maxTotalContainers > 0 && totalContainers <= maxTotalContainers && total > maxTotalContainers { 220 return fmt.Errorf("expected total number of containers: %v, to be <= maxTotalContainers: %v", total, maxTotalContainers) 221 } 222 return nil 223 }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil()) 224 225 if maxPerPodContainer >= 2 && maxTotalContainers < 0 { // make sure constraints wouldn't make us gc old containers 226 ginkgo.By("Making sure the kubelet consistently keeps around an extra copy of each container.") 227 gomega.Consistently(ctx, func() error { 228 for _, pod := range test.testPods { 229 containerNames, err := pod.getContainerNames() 230 if err != nil { 231 return err 232 } 233 for i := 0; i < pod.numContainers; i++ { 234 containerCount := 0 235 for _, containerName := range containerNames { 236 if containerName == pod.getContainerName(i) { 237 containerCount++ 238 } 239 } 240 if pod.restartCount > 0 && containerCount < maxPerPodContainer+1 { 241 return fmt.Errorf("expected pod %v to have extra copies of old containers", pod.podName) 242 } 243 } 244 } 245 return nil 246 }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil()) 247 } 248 }) 249 250 ginkgo.AfterEach(func(ctx context.Context) { 251 for _, pod := range test.testPods { 252 ginkgo.By(fmt.Sprintf("Deleting Pod %v", pod.podName)) 253 e2epod.NewPodClient(f).DeleteSync(ctx, pod.podName, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout) 254 } 255 256 ginkgo.By("Making sure all containers get cleaned up") 257 gomega.Eventually(ctx, func() error { 258 for _, pod := range test.testPods { 259 containerNames, err := pod.getContainerNames() 260 if err != nil { 261 return err 262 } 263 if len(containerNames) > 0 { 264 return fmt.Errorf("%v containers still remain", containerNames) 265 } 266 } 267 return nil 268 }, garbageCollectDuration, runtimePollInterval).Should(gomega.BeNil()) 269 270 if ginkgo.CurrentSpecReport().Failed() && framework.TestContext.DumpLogsOnFailure { 271 logNodeEvents(ctx, f) 272 logPodEvents(ctx, f) 273 } 274 }) 275 }) 276 } 277 278 func getPods(specs []*testPodSpec) (pods []*v1.Pod) { 279 for _, spec := range specs { 280 ginkgo.By(fmt.Sprintf("Creating %v containers with restartCount: %v", spec.numContainers, spec.restartCount)) 281 containers := []v1.Container{} 282 for i := 0; i < spec.numContainers; i++ { 283 containers = append(containers, v1.Container{ 284 Image: busyboxImage, 285 Name: spec.getContainerName(i), 286 Command: getRestartingContainerCommand("/test-empty-dir-mnt", i, spec.restartCount, ""), 287 VolumeMounts: []v1.VolumeMount{ 288 {MountPath: "/test-empty-dir-mnt", Name: "test-empty-dir"}, 289 }, 290 }) 291 } 292 pods = append(pods, &v1.Pod{ 293 ObjectMeta: metav1.ObjectMeta{Name: spec.podName}, 294 Spec: v1.PodSpec{ 295 RestartPolicy: v1.RestartPolicyAlways, 296 Containers: containers, 297 Volumes: []v1.Volume{ 298 {Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}}, 299 }, 300 }, 301 }) 302 } 303 return 304 } 305 306 func getRestartingContainerCommand(path string, containerNum int, restarts int32, loopingCommand string) []string { 307 return []string{ 308 "sh", 309 "-c", 310 fmt.Sprintf(` 311 f=%s/countfile%s 312 count=$(echo 'hello' >> $f ; wc -l $f | awk {'print $1'}) 313 if [ $count -lt %d ]; then 314 exit 0 315 fi 316 while true; do %s sleep 1; done`, 317 path, strconv.Itoa(containerNum), restarts+1, loopingCommand), 318 } 319 } 320 321 func verifyPodRestartCount(ctx context.Context, f *framework.Framework, podName string, expectedNumContainers int, expectedRestartCount int32) error { 322 updatedPod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, podName, metav1.GetOptions{}) 323 if err != nil { 324 return err 325 } 326 if len(updatedPod.Status.ContainerStatuses) != expectedNumContainers { 327 return fmt.Errorf("expected pod %s to have %d containers, actual: %d", 328 updatedPod.Name, expectedNumContainers, len(updatedPod.Status.ContainerStatuses)) 329 } 330 for _, containerStatus := range updatedPod.Status.ContainerStatuses { 331 if containerStatus.RestartCount != expectedRestartCount { 332 return fmt.Errorf("pod %s had container with restartcount %d. Should have been at least %d", 333 updatedPod.Name, containerStatus.RestartCount, expectedRestartCount) 334 } 335 } 336 return nil 337 }