k8s.io/kubernetes@v1.29.3/test/integration/podgc/podgc_test.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package podgc 18 19 import ( 20 "testing" 21 "time" 22 23 "github.com/google/go-cmp/cmp" 24 "github.com/google/go-cmp/cmp/cmpopts" 25 v1 "k8s.io/api/core/v1" 26 apierrors "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/util/wait" 29 utilfeature "k8s.io/apiserver/pkg/util/feature" 30 "k8s.io/client-go/informers" 31 featuregatetesting "k8s.io/component-base/featuregate/testing" 32 "k8s.io/klog/v2" 33 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 34 "k8s.io/kubernetes/pkg/controller/podgc" 35 "k8s.io/kubernetes/pkg/features" 36 testutils "k8s.io/kubernetes/test/integration/util" 37 "k8s.io/utils/ptr" 38 ) 39 40 // TestPodGcOrphanedPodsWithFinalizer tests deletion of orphaned pods 41 func TestPodGcOrphanedPodsWithFinalizer(t *testing.T) { 42 tests := map[string]struct { 43 enablePodDisruptionConditions bool 44 enableJobPodReplacementPolicy bool 45 phase v1.PodPhase 46 wantPhase v1.PodPhase 47 wantDisruptionTarget *v1.PodCondition 48 }{ 49 "PodDisruptionConditions enabled": { 50 enablePodDisruptionConditions: true, 51 phase: v1.PodPending, 52 wantPhase: v1.PodFailed, 53 wantDisruptionTarget: &v1.PodCondition{ 54 Type: v1.DisruptionTarget, 55 Status: v1.ConditionTrue, 56 Reason: "DeletionByPodGC", 57 Message: "PodGC: node no longer exists", 58 }, 59 }, 60 "PodDisruptionConditions and PodReplacementPolicy enabled": { 61 enablePodDisruptionConditions: true, 62 enableJobPodReplacementPolicy: true, 63 phase: v1.PodPending, 64 wantPhase: v1.PodFailed, 65 wantDisruptionTarget: &v1.PodCondition{ 66 Type: v1.DisruptionTarget, 67 Status: v1.ConditionTrue, 68 Reason: "DeletionByPodGC", 69 Message: "PodGC: node no longer exists", 70 }, 71 }, 72 "Only PodReplacementPolicy enabled; no PodDisruptionCondition": { 73 enablePodDisruptionConditions: false, 74 enableJobPodReplacementPolicy: true, 75 phase: v1.PodPending, 76 wantPhase: v1.PodFailed, 77 }, 78 "PodDisruptionConditions disabled": { 79 enablePodDisruptionConditions: false, 80 phase: v1.PodPending, 81 wantPhase: v1.PodPending, 82 }, 83 "PodDisruptionConditions enabled; succeeded pod": { 84 enablePodDisruptionConditions: true, 85 phase: v1.PodSucceeded, 86 wantPhase: v1.PodSucceeded, 87 }, 88 "PodDisruptionConditions enabled; failed pod": { 89 enablePodDisruptionConditions: true, 90 phase: v1.PodFailed, 91 wantPhase: v1.PodFailed, 92 }, 93 } 94 95 for name, test := range tests { 96 t.Run(name, func(t *testing.T) { 97 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() 98 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)() 99 testCtx := setup(t, "podgc-orphaned") 100 cs := testCtx.ClientSet 101 102 node := &v1.Node{ 103 ObjectMeta: metav1.ObjectMeta{ 104 Name: "node", 105 }, 106 Spec: v1.NodeSpec{}, 107 Status: v1.NodeStatus{ 108 Conditions: []v1.NodeCondition{ 109 { 110 Type: v1.NodeReady, 111 Status: v1.ConditionTrue, 112 }, 113 }, 114 }, 115 } 116 node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}) 117 if err != nil { 118 t.Fatalf("Failed to create node '%v', err: %v", node.Name, err) 119 } 120 121 pod := &v1.Pod{ 122 ObjectMeta: metav1.ObjectMeta{ 123 Name: "testpod", 124 Namespace: testCtx.NS.Name, 125 Finalizers: []string{"test.k8s.io/finalizer"}, 126 }, 127 Spec: v1.PodSpec{ 128 NodeName: node.Name, 129 Containers: []v1.Container{ 130 {Name: "foo", Image: "bar"}, 131 }, 132 }, 133 } 134 135 pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{}) 136 if err != nil { 137 t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod)) 138 } 139 defer testutils.RemovePodFinalizers(testCtx.Ctx, testCtx.ClientSet, t, *pod) 140 141 pod.Status.Phase = test.phase 142 if _, err := testCtx.ClientSet.CoreV1().Pods(testCtx.NS.Name).UpdateStatus(testCtx.Ctx, pod, metav1.UpdateOptions{}); err != nil { 143 t.Fatalf("Error %v, while setting phase %v for pod: %v", err, test.phase, klog.KObj(pod)) 144 } 145 146 // we delete the node to orphan the pod 147 err = cs.CoreV1().Nodes().Delete(testCtx.Ctx, pod.Spec.NodeName, metav1.DeleteOptions{}) 148 if err != nil { 149 t.Fatalf("Failed to delete node: %v, err: %v", pod.Spec.NodeName, err) 150 } 151 err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*15, true, testutils.PodIsGettingEvicted(cs, pod.Namespace, pod.Name)) 152 if err != nil { 153 t.Fatalf("Error '%v' while waiting for the pod '%v' to be terminating", err, klog.KObj(pod)) 154 } 155 pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{}) 156 if err != nil { 157 t.Fatalf("Error: '%v' while updating pod info: '%v'", err, klog.KObj(pod)) 158 } 159 _, gotDisruptionTarget := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget) 160 if diff := cmp.Diff(test.wantDisruptionTarget, gotDisruptionTarget, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" { 161 t.Errorf("Pod %v has unexpected DisruptionTarget condition: %s", klog.KObj(pod), diff) 162 } 163 if pod.Status.Phase != test.wantPhase { 164 t.Errorf("Unexpected phase for pod %q. Got: %q, want: %q", klog.KObj(pod), pod.Status.Phase, test.wantPhase) 165 } 166 }) 167 } 168 } 169 170 // TestTerminatingOnOutOfServiceNode tests deletion pods terminating on out-of-service nodes 171 func TestTerminatingOnOutOfServiceNode(t *testing.T) { 172 tests := map[string]struct { 173 enablePodDisruptionConditions bool 174 enableJobPodReplacementPolicy bool 175 withFinalizer bool 176 wantPhase v1.PodPhase 177 }{ 178 "pod has phase changed to Failed when PodDisruptionConditions enabled": { 179 enablePodDisruptionConditions: true, 180 withFinalizer: true, 181 wantPhase: v1.PodFailed, 182 }, 183 "pod has phase unchanged when PodDisruptionConditions disabled": { 184 enablePodDisruptionConditions: false, 185 withFinalizer: true, 186 wantPhase: v1.PodPending, 187 }, 188 "pod is getting deleted when no finalizer and PodDisruptionConditions enabled": { 189 enablePodDisruptionConditions: true, 190 withFinalizer: false, 191 }, 192 "pod is getting deleted when no finalizer and PodDisruptionConditions disabled": { 193 enablePodDisruptionConditions: false, 194 withFinalizer: false, 195 }, 196 "pod has phase changed when PodDisruptionConditions disabled, but JobPodReplacementPolicy enabled": { 197 enablePodDisruptionConditions: false, 198 enableJobPodReplacementPolicy: true, 199 withFinalizer: true, 200 wantPhase: v1.PodFailed, 201 }, 202 } 203 204 for name, test := range tests { 205 t.Run(name, func(t *testing.T) { 206 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() 207 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeOutOfServiceVolumeDetach, true)() 208 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.JobPodReplacementPolicy, test.enableJobPodReplacementPolicy)() 209 testCtx := setup(t, "podgc-out-of-service") 210 cs := testCtx.ClientSet 211 212 node := &v1.Node{ 213 ObjectMeta: metav1.ObjectMeta{ 214 Name: "node", 215 }, 216 Spec: v1.NodeSpec{}, 217 Status: v1.NodeStatus{ 218 Conditions: []v1.NodeCondition{ 219 { 220 Type: v1.NodeReady, 221 Status: v1.ConditionFalse, 222 }, 223 }, 224 }, 225 } 226 node, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}) 227 if err != nil { 228 t.Fatalf("Failed to create node '%v', err: %v", node.Name, err) 229 } 230 231 pod := &v1.Pod{ 232 ObjectMeta: metav1.ObjectMeta{ 233 Name: "testpod", 234 Namespace: testCtx.NS.Name, 235 }, 236 Spec: v1.PodSpec{ 237 NodeName: node.Name, 238 Containers: []v1.Container{ 239 {Name: "foo", Image: "bar"}, 240 }, 241 }, 242 Status: v1.PodStatus{ 243 Phase: v1.PodRunning, 244 }, 245 } 246 if test.withFinalizer { 247 pod.ObjectMeta.Finalizers = []string{"test.k8s.io/finalizer"} 248 } 249 250 pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{}) 251 if err != nil { 252 t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod)) 253 } 254 if test.withFinalizer { 255 defer testutils.RemovePodFinalizers(testCtx.Ctx, testCtx.ClientSet, t, *pod) 256 } 257 258 // trigger termination of the pod, but with long grace period so that it is not removed immediately 259 err = cs.CoreV1().Pods(testCtx.NS.Name).Delete(testCtx.Ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: ptr.To[int64](300)}) 260 if err != nil { 261 t.Fatalf("Error: '%v' while deleting pod: '%v'", err, klog.KObj(pod)) 262 } 263 // wait until the pod is terminating 264 err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*15, true, testutils.PodIsGettingEvicted(cs, pod.Namespace, pod.Name)) 265 if err != nil { 266 t.Fatalf("Error '%v' while waiting for the pod '%v' to be terminating", err, klog.KObj(pod)) 267 } 268 // taint the node with the out-of-service taint 269 err = testutils.AddTaintToNode(cs, pod.Spec.NodeName, v1.Taint{Key: v1.TaintNodeOutOfService, Value: "", Effect: v1.TaintEffectNoExecute}) 270 if err != nil { 271 t.Fatalf("Failed to taint node: %v, err: %v", pod.Spec.NodeName, err) 272 } 273 if test.withFinalizer { 274 // wait until the pod phase is set as expected 275 err = wait.Poll(time.Second, time.Second*15, func() (bool, error) { 276 var e error 277 pod, e = cs.CoreV1().Pods(pod.Namespace).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{}) 278 if e != nil { 279 return true, e 280 } 281 return test.wantPhase == pod.Status.Phase, nil 282 }) 283 if err != nil { 284 t.Errorf("Error %q while waiting for the pod %q to be in expected phase", err, klog.KObj(pod)) 285 } 286 _, cond := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget) 287 if cond != nil { 288 t.Errorf("Pod %q has an unexpected condition: %q", klog.KObj(pod), v1.DisruptionTarget) 289 } 290 } else { 291 // wait until the pod is deleted 292 err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) { 293 var e error 294 pod, e = cs.CoreV1().Pods(pod.Namespace).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{}) 295 if e == nil { 296 return pod == nil, nil 297 } 298 // there was an error 299 if apierrors.IsNotFound(e) { 300 return true, nil 301 } 302 return false, e 303 }) 304 if err != nil { 305 t.Errorf("Error %q while waiting for the pod %q to be deleted", err, klog.KObj(pod)) 306 } 307 } 308 }) 309 } 310 } 311 312 // TestPodGcForPodsWithDuplicatedFieldKeys regression test for https://issues.k8s.io/118261 313 func TestPodGcForPodsWithDuplicatedFieldKeys(t *testing.T) { 314 tests := map[string]struct { 315 pod *v1.Pod 316 wantDisruptionTarget *v1.PodCondition 317 }{ 318 "Orphan pod with duplicated env vars": { 319 pod: &v1.Pod{ 320 ObjectMeta: metav1.ObjectMeta{ 321 Name: "testpod", 322 Finalizers: []string{"test.k8s.io/finalizer"}, 323 }, 324 Spec: v1.PodSpec{ 325 NodeName: "non-existing-node", 326 Containers: []v1.Container{ 327 { 328 Name: "foo", 329 Image: "bar", 330 Env: []v1.EnvVar{ 331 { 332 Name: "XYZ", 333 Value: "1", 334 }, 335 { 336 Name: "XYZ", 337 Value: "2", 338 }, 339 }, 340 }, 341 }, 342 }, 343 }, 344 wantDisruptionTarget: &v1.PodCondition{ 345 Type: v1.DisruptionTarget, 346 Status: v1.ConditionTrue, 347 Reason: "DeletionByPodGC", 348 Message: "PodGC: node no longer exists", 349 }, 350 }, 351 "Orphan pod with duplicated ports; scenario from https://issues.k8s.io/113482": { 352 pod: &v1.Pod{ 353 ObjectMeta: metav1.ObjectMeta{ 354 Name: "testpod", 355 Finalizers: []string{"test.k8s.io/finalizer"}, 356 }, 357 Spec: v1.PodSpec{ 358 NodeName: "non-existing-node", 359 Containers: []v1.Container{ 360 { 361 Name: "foo", 362 Image: "bar", 363 Ports: []v1.ContainerPort{ 364 { 365 ContainerPort: 93, 366 HostPort: 9376, 367 }, 368 { 369 ContainerPort: 93, 370 HostPort: 9377, 371 }, 372 }, 373 }, 374 }, 375 }, 376 }, 377 wantDisruptionTarget: &v1.PodCondition{ 378 Type: v1.DisruptionTarget, 379 Status: v1.ConditionTrue, 380 Reason: "DeletionByPodGC", 381 Message: "PodGC: node no longer exists", 382 }, 383 }, 384 } 385 386 for name, test := range tests { 387 t.Run(name, func(t *testing.T) { 388 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)() 389 testCtx := setup(t, "podgc-orphaned") 390 cs := testCtx.ClientSet 391 392 pod := test.pod 393 pod.Namespace = testCtx.NS.Namespace 394 pod, err := cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, pod, metav1.CreateOptions{}) 395 if err != nil { 396 t.Fatalf("Error %v, while creating pod: %v", err, klog.KObj(pod)) 397 } 398 defer testutils.RemovePodFinalizers(testCtx.Ctx, testCtx.ClientSet, t, *pod) 399 400 // getting evicted due to NodeName being "non-existing-node" 401 err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*15, true, testutils.PodIsGettingEvicted(cs, pod.Namespace, pod.Name)) 402 if err != nil { 403 t.Fatalf("Error '%v' while waiting for the pod '%v' to be terminating", err, klog.KObj(pod)) 404 } 405 pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, pod.Name, metav1.GetOptions{}) 406 if err != nil { 407 t.Fatalf("Error: '%v' while updating pod info: '%v'", err, klog.KObj(pod)) 408 } 409 _, gotDisruptionTarget := podutil.GetPodCondition(&pod.Status, v1.DisruptionTarget) 410 if diff := cmp.Diff(test.wantDisruptionTarget, gotDisruptionTarget, cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" { 411 t.Errorf("Pod %v has unexpected DisruptionTarget condition: %s", klog.KObj(pod), diff) 412 } 413 if gotDisruptionTarget != nil && gotDisruptionTarget.LastTransitionTime.IsZero() { 414 t.Errorf("Pod %v has DisruptionTarget condition without LastTransitionTime", klog.KObj(pod)) 415 } 416 if pod.Status.Phase != v1.PodFailed { 417 t.Errorf("Unexpected phase for pod %q. Got: %q, want: %q", klog.KObj(pod), pod.Status.Phase, v1.PodFailed) 418 } 419 }) 420 } 421 } 422 423 func setup(t *testing.T, name string) *testutils.TestContext { 424 testCtx := testutils.InitTestAPIServer(t, name, nil) 425 externalInformers := informers.NewSharedInformerFactory(testCtx.ClientSet, time.Second) 426 427 podgc := podgc.NewPodGCInternal(testCtx.Ctx, 428 testCtx.ClientSet, 429 externalInformers.Core().V1().Pods(), 430 externalInformers.Core().V1().Nodes(), 431 0, 432 500*time.Millisecond, 433 time.Second) 434 435 // Waiting for all controllers to sync 436 externalInformers.Start(testCtx.Ctx.Done()) 437 externalInformers.WaitForCacheSync(testCtx.Ctx.Done()) 438 439 go podgc.Run(testCtx.Ctx) 440 return testCtx 441 }