k8s.io/kubernetes@v1.29.3/pkg/controller/podgc/gc_controller_test.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package podgc 18 19 import ( 20 "context" 21 "encoding/json" 22 "testing" 23 "time" 24 25 "github.com/google/go-cmp/cmp" 26 "github.com/google/go-cmp/cmp/cmpopts" 27 28 v1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/labels" 31 "k8s.io/apimachinery/pkg/util/sets" 32 "k8s.io/apimachinery/pkg/util/strategicpatch" 33 "k8s.io/apimachinery/pkg/util/wait" 34 utilfeature "k8s.io/apiserver/pkg/util/feature" 35 "k8s.io/client-go/informers" 36 coreinformers "k8s.io/client-go/informers/core/v1" 37 clientset "k8s.io/client-go/kubernetes" 38 "k8s.io/client-go/kubernetes/fake" 39 clienttesting "k8s.io/client-go/testing" 40 "k8s.io/client-go/util/workqueue" 41 featuregatetesting "k8s.io/component-base/featuregate/testing" 42 metricstestutil "k8s.io/component-base/metrics/testutil" 43 "k8s.io/klog/v2/ktesting" 44 "k8s.io/kubernetes/pkg/controller" 45 "k8s.io/kubernetes/pkg/controller/podgc/metrics" 46 "k8s.io/kubernetes/pkg/controller/testutil" 47 "k8s.io/kubernetes/pkg/features" 48 "k8s.io/kubernetes/pkg/kubelet/eviction" 49 testingclock "k8s.io/utils/clock/testing" 50 "k8s.io/utils/pointer" 51 ) 52 53 func alwaysReady() bool { return true } 54 55 func NewFromClient(ctx context.Context, kubeClient clientset.Interface, terminatedPodThreshold int) (*PodGCController, coreinformers.PodInformer, coreinformers.NodeInformer) { 56 informerFactory := informers.NewSharedInformerFactory(kubeClient, controller.NoResyncPeriodFunc()) 57 podInformer := informerFactory.Core().V1().Pods() 58 nodeInformer := informerFactory.Core().V1().Nodes() 59 controller := NewPodGC(ctx, kubeClient, podInformer, nodeInformer, terminatedPodThreshold) 60 controller.podListerSynced = alwaysReady 61 return controller, podInformer, nodeInformer 62 } 63 64 func TestGCTerminated(t *testing.T) { 65 type nameToPhase struct { 66 name string 67 phase v1.PodPhase 68 reason string 69 } 70 71 testCases := []struct { 72 name string 73 pods []nameToPhase 74 threshold int 75 deletedPodNames sets.String 76 patchedPodNames sets.String 77 enablePodDisruptionConditions bool 78 }{ 79 { 80 name: "delete pod a which is PodFailed and pod b which is PodSucceeded; PodDisruptionConditions enabled", 81 pods: []nameToPhase{ 82 {name: "a", phase: v1.PodFailed}, 83 {name: "b", phase: v1.PodSucceeded}, 84 {name: "c", phase: v1.PodFailed}, 85 }, 86 threshold: 1, 87 patchedPodNames: sets.NewString(), 88 deletedPodNames: sets.NewString("a", "b"), 89 enablePodDisruptionConditions: true, 90 }, 91 { 92 name: "threshold = 0, disables terminated pod deletion", 93 pods: []nameToPhase{ 94 {name: "a", phase: v1.PodFailed}, 95 {name: "b", phase: v1.PodSucceeded}, 96 }, 97 threshold: 0, 98 // threshold = 0 disables terminated pod deletion 99 deletedPodNames: sets.NewString(), 100 }, 101 { 102 name: "threshold = 1, delete pod a which is PodFailed and pod b which is PodSucceeded", 103 pods: []nameToPhase{ 104 {name: "a", phase: v1.PodFailed}, 105 {name: "b", phase: v1.PodSucceeded}, 106 {name: "c", phase: v1.PodFailed}, 107 }, 108 threshold: 1, 109 deletedPodNames: sets.NewString("a", "b"), 110 }, 111 { 112 name: "threshold = 1, delete pod b which is PodSucceeded", 113 pods: []nameToPhase{ 114 {name: "a", phase: v1.PodRunning}, 115 {name: "b", phase: v1.PodSucceeded}, 116 {name: "c", phase: v1.PodFailed}, 117 }, 118 threshold: 1, 119 deletedPodNames: sets.NewString("b"), 120 }, 121 { 122 name: "threshold = 1, delete pod a which is PodFailed", 123 pods: []nameToPhase{ 124 {name: "a", phase: v1.PodFailed}, 125 {name: "b", phase: v1.PodSucceeded}, 126 }, 127 threshold: 1, 128 deletedPodNames: sets.NewString("a"), 129 }, 130 { 131 name: "threshold = 5, don't delete pod", 132 pods: []nameToPhase{ 133 {name: "a", phase: v1.PodFailed}, 134 {name: "b", phase: v1.PodSucceeded}, 135 }, 136 threshold: 5, 137 deletedPodNames: sets.NewString(), 138 }, 139 { 140 pods: []nameToPhase{ 141 {name: "a", phase: v1.PodFailed}, 142 {name: "b", phase: v1.PodSucceeded}, 143 {name: "c", phase: v1.PodFailed, reason: eviction.Reason}, 144 }, 145 threshold: 1, 146 deletedPodNames: sets.NewString("c", "a"), 147 }, 148 { 149 pods: []nameToPhase{ 150 {name: "a", phase: v1.PodRunning}, 151 {name: "b", phase: v1.PodSucceeded}, 152 {name: "c", phase: v1.PodFailed, reason: eviction.Reason}, 153 }, 154 threshold: 1, 155 deletedPodNames: sets.NewString("c"), 156 }, 157 } 158 for _, test := range testCases { 159 t.Run(test.name, func(t *testing.T) { 160 _, ctx := ktesting.NewTestContext(t) 161 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() 162 creationTime := time.Unix(0, 0) 163 nodes := []*v1.Node{testutil.NewNode("node")} 164 165 pods := make([]*v1.Pod, 0, len(test.pods)) 166 for _, pod := range test.pods { 167 creationTime = creationTime.Add(1 * time.Hour) 168 pods = append(pods, &v1.Pod{ 169 ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime}}, 170 Status: v1.PodStatus{Phase: pod.phase, Reason: pod.reason}, 171 Spec: v1.PodSpec{NodeName: "node"}, 172 }) 173 } 174 client := setupNewSimpleClient(nodes, pods) 175 gcc, podInformer, _ := NewFromClient(ctx, client, test.threshold) 176 for _, pod := range pods { 177 podInformer.Informer().GetStore().Add(pod) 178 } 179 180 gcc.gc(ctx) 181 182 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames) 183 }) 184 } 185 186 // testDeletingPodsMetrics is 9 in this test 187 testDeletingPodsMetrics(t, 9, metrics.PodGCReasonTerminated) 188 } 189 190 func makePod(name string, nodeName string, phase v1.PodPhase) *v1.Pod { 191 return &v1.Pod{ 192 ObjectMeta: metav1.ObjectMeta{ 193 Name: name, 194 Namespace: metav1.NamespaceDefault, 195 }, 196 Spec: v1.PodSpec{NodeName: nodeName}, 197 Status: v1.PodStatus{Phase: phase}, 198 } 199 } 200 201 func waitForAdded(q workqueue.DelayingInterface, depth int) error { 202 return wait.Poll(1*time.Millisecond, 10*time.Second, func() (done bool, err error) { 203 if q.Len() == depth { 204 return true, nil 205 } 206 207 return false, nil 208 }) 209 } 210 211 func TestGCOrphaned(t *testing.T) { 212 testCases := []struct { 213 name string 214 initialClientNodes []*v1.Node 215 initialInformerNodes []*v1.Node 216 delay time.Duration 217 addedClientNodes []*v1.Node 218 deletedClientNodes []*v1.Node 219 addedInformerNodes []*v1.Node 220 deletedInformerNodes []*v1.Node 221 pods []*v1.Pod 222 itemsInQueue int 223 deletedPodNames sets.String 224 patchedPodNames sets.String 225 enablePodDisruptionConditions bool 226 }{ 227 { 228 name: "nodes present in lister", 229 initialInformerNodes: []*v1.Node{ 230 testutil.NewNode("existing1"), 231 testutil.NewNode("existing2"), 232 }, 233 delay: 2 * quarantineTime, 234 pods: []*v1.Pod{ 235 makePod("a", "existing1", v1.PodRunning), 236 makePod("b", "existing2", v1.PodFailed), 237 makePod("c", "existing2", v1.PodSucceeded), 238 }, 239 itemsInQueue: 0, 240 deletedPodNames: sets.NewString(), 241 }, 242 { 243 name: "nodes present in client", 244 initialClientNodes: []*v1.Node{ 245 testutil.NewNode("existing1"), 246 testutil.NewNode("existing2"), 247 }, 248 delay: 2 * quarantineTime, 249 pods: []*v1.Pod{ 250 makePod("a", "existing1", v1.PodRunning), 251 makePod("b", "existing2", v1.PodFailed), 252 makePod("c", "existing2", v1.PodSucceeded), 253 }, 254 itemsInQueue: 2, 255 deletedPodNames: sets.NewString(), 256 }, 257 { 258 name: "no nodes", 259 delay: 2 * quarantineTime, 260 pods: []*v1.Pod{ 261 makePod("a", "deleted", v1.PodFailed), 262 makePod("b", "deleted", v1.PodSucceeded), 263 }, 264 itemsInQueue: 1, 265 deletedPodNames: sets.NewString("a", "b"), 266 }, 267 { 268 name: "no nodes with PodDisruptionConditions enabled", 269 delay: 2 * quarantineTime, 270 pods: []*v1.Pod{ 271 makePod("a", "deleted", v1.PodFailed), 272 makePod("b", "deleted", v1.PodSucceeded), 273 makePod("c", "deleted", v1.PodRunning), 274 }, 275 itemsInQueue: 1, 276 deletedPodNames: sets.NewString("a", "b", "c"), 277 patchedPodNames: sets.NewString("c"), 278 enablePodDisruptionConditions: true, 279 }, 280 { 281 name: "quarantine not finished", 282 delay: quarantineTime / 2, 283 pods: []*v1.Pod{ 284 makePod("a", "deleted", v1.PodFailed), 285 }, 286 itemsInQueue: 0, 287 deletedPodNames: sets.NewString(), 288 }, 289 { 290 name: "wrong nodes", 291 initialInformerNodes: []*v1.Node{testutil.NewNode("existing")}, 292 delay: 2 * quarantineTime, 293 pods: []*v1.Pod{ 294 makePod("a", "deleted", v1.PodRunning), 295 }, 296 itemsInQueue: 1, 297 deletedPodNames: sets.NewString("a"), 298 patchedPodNames: sets.NewString("a"), 299 }, 300 { 301 name: "some nodes missing", 302 initialInformerNodes: []*v1.Node{testutil.NewNode("existing")}, 303 delay: 2 * quarantineTime, 304 pods: []*v1.Pod{ 305 makePod("a", "deleted", v1.PodFailed), 306 makePod("b", "existing", v1.PodFailed), 307 makePod("c", "deleted", v1.PodSucceeded), 308 makePod("d", "deleted", v1.PodRunning), 309 }, 310 itemsInQueue: 1, 311 deletedPodNames: sets.NewString("a", "c", "d"), 312 patchedPodNames: sets.NewString("d"), 313 }, 314 { 315 name: "node added to client after quarantine", 316 delay: 2 * quarantineTime, 317 addedClientNodes: []*v1.Node{testutil.NewNode("node")}, 318 pods: []*v1.Pod{ 319 makePod("a", "node", v1.PodRunning), 320 }, 321 itemsInQueue: 1, 322 deletedPodNames: sets.NewString(), 323 }, 324 { 325 name: "node added to informer after quarantine", 326 delay: 2 * quarantineTime, 327 addedInformerNodes: []*v1.Node{testutil.NewNode("node")}, 328 pods: []*v1.Pod{ 329 makePod("a", "node", v1.PodFailed), 330 }, 331 itemsInQueue: 1, 332 deletedPodNames: sets.NewString(), 333 }, 334 { 335 // It shouldn't happen that client will be lagging behind informer. 336 // This test case is more a sanity check. 337 name: "node deleted from client after quarantine", 338 initialClientNodes: []*v1.Node{testutil.NewNode("node")}, 339 delay: 2 * quarantineTime, 340 deletedClientNodes: []*v1.Node{testutil.NewNode("node")}, 341 pods: []*v1.Pod{ 342 makePod("a", "node", v1.PodFailed), 343 }, 344 itemsInQueue: 1, 345 deletedPodNames: sets.NewString("a"), 346 }, 347 { 348 name: "node deleted from informer after quarantine", 349 initialInformerNodes: []*v1.Node{testutil.NewNode("node")}, 350 delay: 2 * quarantineTime, 351 deletedInformerNodes: []*v1.Node{testutil.NewNode("node")}, 352 pods: []*v1.Pod{ 353 makePod("a", "node", v1.PodSucceeded), 354 }, 355 itemsInQueue: 0, 356 deletedPodNames: sets.NewString(), 357 }, 358 } 359 360 for _, test := range testCases { 361 t.Run(test.name, func(t *testing.T) { 362 _, ctx := ktesting.NewTestContext(t) 363 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() 364 nodes := make([]*v1.Node, 0, len(test.initialClientNodes)) 365 for _, node := range test.initialClientNodes { 366 nodes = append(nodes, node) 367 } 368 pods := make([]*v1.Pod, 0, len(test.pods)) 369 for _, pod := range test.pods { 370 pods = append(pods, pod) 371 } 372 client := setupNewSimpleClient(nodes, pods) 373 gcc, podInformer, nodeInformer := NewFromClient(ctx, client, -1) 374 for _, node := range test.initialInformerNodes { 375 nodeInformer.Informer().GetStore().Add(node) 376 } 377 for _, pod := range test.pods { 378 podInformer.Informer().GetStore().Add(pod) 379 } 380 // Overwrite queue 381 fakeClock := testingclock.NewFakeClock(time.Now()) 382 gcc.nodeQueue.ShutDown() 383 gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue") 384 385 // First GC of orphaned pods 386 gcc.gc(ctx) 387 deletedPodNames := getDeletedPodNames(client) 388 389 if len(deletedPodNames) > 0 { 390 t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames) 391 } 392 393 // Move clock forward 394 fakeClock.Step(test.delay) 395 // Wait for queue goroutine to process items 396 if test.itemsInQueue > 0 { 397 err := waitForAdded(gcc.nodeQueue, test.itemsInQueue) 398 if err != nil { 399 t.Errorf("wrong number of items in the node queue.\n\texpected: %v\n\tactual: %v", 400 test.itemsInQueue, gcc.nodeQueue.Len()) 401 } 402 } 403 404 // Execute planned nodes changes 405 for _, node := range test.addedClientNodes { 406 client.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) 407 } 408 for _, node := range test.deletedClientNodes { 409 client.CoreV1().Nodes().Delete(context.TODO(), node.Name, metav1.DeleteOptions{}) 410 } 411 for _, node := range test.addedInformerNodes { 412 nodeInformer.Informer().GetStore().Add(node) 413 } 414 for _, node := range test.deletedInformerNodes { 415 nodeInformer.Informer().GetStore().Delete(node) 416 } 417 418 // Actual pod deletion 419 gcc.gc(context.TODO()) 420 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames) 421 }) 422 } 423 424 // testDeletingPodsMetrics is 10 in this test 425 testDeletingPodsMetrics(t, 10, metrics.PodGCReasonOrphaned) 426 } 427 428 func TestGCUnscheduledTerminating(t *testing.T) { 429 type nameToPhase struct { 430 name string 431 phase v1.PodPhase 432 deletionTimeStamp *metav1.Time 433 nodeName string 434 } 435 436 testCases := []struct { 437 name string 438 pods []nameToPhase 439 deletedPodNames sets.String 440 patchedPodNames sets.String 441 enablePodDisruptionConditions bool 442 }{ 443 { 444 name: "Unscheduled pod in any phase must be deleted, the phase of the running pod is changed to Failed; PodDisruptionConditions enabled", 445 pods: []nameToPhase{ 446 {name: "a", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: ""}, 447 {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: ""}, 448 {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""}, 449 }, 450 deletedPodNames: sets.NewString("a", "b", "c"), 451 patchedPodNames: sets.NewString("c"), 452 enablePodDisruptionConditions: true, 453 }, 454 { 455 name: "Unscheduled pod in any phase must be deleted", 456 pods: []nameToPhase{ 457 {name: "a", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: ""}, 458 {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: ""}, 459 {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""}, 460 }, 461 deletedPodNames: sets.NewString("a", "b", "c"), 462 patchedPodNames: sets.NewString("c"), 463 }, 464 { 465 name: "Scheduled pod in any phase must not be deleted", 466 pods: []nameToPhase{ 467 {name: "a", phase: v1.PodFailed, deletionTimeStamp: nil, nodeName: ""}, 468 {name: "b", phase: v1.PodSucceeded, deletionTimeStamp: nil, nodeName: "node"}, 469 {name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: "node"}, 470 }, 471 deletedPodNames: sets.NewString(), 472 }, 473 } 474 475 for _, test := range testCases { 476 t.Run(test.name, func(t *testing.T) { 477 _, ctx := ktesting.NewTestContext(t) 478 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() 479 creationTime := time.Unix(0, 0) 480 481 pods := make([]*v1.Pod, 0, len(test.pods)) 482 for _, pod := range test.pods { 483 creationTime = creationTime.Add(1 * time.Hour) 484 pods = append(pods, &v1.Pod{ 485 ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime}, 486 DeletionTimestamp: pod.deletionTimeStamp}, 487 Status: v1.PodStatus{Phase: pod.phase}, 488 Spec: v1.PodSpec{NodeName: pod.nodeName}, 489 }) 490 } 491 nodes := []*v1.Node{} 492 client := setupNewSimpleClient(nodes, pods) 493 gcc, podInformer, _ := NewFromClient(ctx, client, -1) 494 495 for _, pod := range pods { 496 podInformer.Informer().GetStore().Add(pod) 497 } 498 499 pods, err := podInformer.Lister().List(labels.Everything()) 500 if err != nil { 501 t.Errorf("Error while listing all Pods: %v", err) 502 return 503 } 504 gcc.gcUnscheduledTerminating(ctx, pods) 505 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames) 506 }) 507 } 508 509 // testDeletingPodsMetrics is 6 in this test 510 testDeletingPodsMetrics(t, 6, metrics.PodGCReasonTerminatingUnscheduled) 511 } 512 513 func TestGCTerminating(t *testing.T) { 514 type node struct { 515 name string 516 readyCondition v1.ConditionStatus 517 taints []v1.Taint 518 } 519 520 type nameToPodConfig struct { 521 name string 522 phase v1.PodPhase 523 deletionTimeStamp *metav1.Time 524 nodeName string 525 } 526 527 testCases := []struct { 528 name string 529 pods []nameToPodConfig 530 nodes []node 531 deletedPodNames sets.String 532 patchedPodNames sets.String 533 enablePodDisruptionConditions bool 534 }{ 535 { 536 name: "pods have deletion timestamp set and the corresponding nodes are not ready", 537 nodes: []node{ 538 {name: "worker-0", readyCondition: v1.ConditionFalse}, 539 {name: "worker-1", readyCondition: v1.ConditionFalse}, 540 }, 541 pods: []nameToPodConfig{ 542 {name: "a", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"}, 543 {name: "b", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"}, 544 }, 545 deletedPodNames: sets.NewString(), 546 }, 547 548 { 549 name: "some pods have deletion timestamp and/or phase set and some of the corresponding nodes have an" + 550 "outofservice taint that are not ready", 551 nodes: []node{ 552 // terminated pods on this node should be force deleted 553 {name: "worker-0", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService, 554 Effect: v1.TaintEffectNoExecute}}}, 555 // terminated pods on this node should not be force deleted 556 {name: "worker-1", readyCondition: v1.ConditionFalse}, 557 // terminated pods on this node should not be force deleted 558 {name: "worker-2", readyCondition: v1.ConditionTrue}, 559 // terminated pods on this node should be force deleted 560 {name: "worker-3", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService, 561 Effect: v1.TaintEffectNoSchedule}}}, 562 // terminated pods on this node should be force deleted 563 {name: "worker-4", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService, 564 Effect: v1.TaintEffectPreferNoSchedule}}}, 565 // terminated pods on this node should be force deleted 566 {name: "worker-5", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService, 567 Value: "any-value", Effect: v1.TaintEffectNoExecute}}}, 568 }, 569 pods: []nameToPodConfig{ 570 // pods a1, b1, c1, d1 and e1 are on node worker-0 571 {name: "a1", nodeName: "worker-0"}, 572 {name: "b1", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-0"}, 573 {name: "c1", phase: v1.PodPending, nodeName: "worker-0"}, 574 {name: "d1", phase: v1.PodRunning, nodeName: "worker-0"}, 575 {name: "e1", phase: v1.PodUnknown, nodeName: "worker-0"}, 576 577 // pods a2, b2, c2, d2 and e2 are on node worker-1 578 {name: "a2", nodeName: "worker-1"}, 579 {name: "b2", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-1"}, 580 {name: "c2", phase: v1.PodPending, nodeName: "worker-1"}, 581 {name: "d2", phase: v1.PodRunning, nodeName: "worker-1"}, 582 {name: "e2", phase: v1.PodUnknown, nodeName: "worker-1"}, 583 584 // pods a3, b3, c3, d3 and e3 are on node worker-2 585 {name: "a3", nodeName: "worker-2"}, 586 {name: "b3", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-2"}, 587 {name: "c3", phase: v1.PodPending, nodeName: "worker-2"}, 588 {name: "d3", phase: v1.PodRunning, nodeName: "worker-2"}, 589 {name: "e3", phase: v1.PodUnknown, nodeName: "worker-2"}, 590 591 // pods a4, b4, c4, d4 and e4 are on node worker-3 592 {name: "a4", nodeName: "worker-3"}, 593 {name: "b4", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-3"}, 594 {name: "c4", phase: v1.PodPending, nodeName: "worker-3"}, 595 {name: "d4", phase: v1.PodRunning, nodeName: "worker-3"}, 596 {name: "e4", phase: v1.PodUnknown, nodeName: "worker-3"}, 597 598 // pods a5, b5, c5, d5 and e5 are on node worker-4 599 {name: "a5", nodeName: "worker-3"}, 600 {name: "b5", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-4"}, 601 {name: "c5", phase: v1.PodPending, nodeName: "worker-4"}, 602 {name: "d5", phase: v1.PodRunning, nodeName: "worker-4"}, 603 {name: "e5", phase: v1.PodUnknown, nodeName: "worker-4"}, 604 605 // pods a6, b6, c6, d6 and e6 are on node worker-5 606 {name: "a6", nodeName: "worker-5"}, 607 {name: "b6", deletionTimeStamp: &metav1.Time{}, nodeName: "worker-5"}, 608 {name: "c6", phase: v1.PodPending, nodeName: "worker-5"}, 609 {name: "d6", phase: v1.PodRunning, nodeName: "worker-5"}, 610 {name: "e6", phase: v1.PodUnknown, nodeName: "worker-5"}, 611 }, 612 deletedPodNames: sets.NewString("b1", "b4", "b5", "b6"), 613 patchedPodNames: sets.NewString("b1", "b4", "b5", "b6"), 614 }, 615 { 616 name: "pods deleted from node tained out-of-service; PodDisruptionConditions enabled", 617 nodes: []node{ 618 {name: "worker", readyCondition: v1.ConditionFalse, taints: []v1.Taint{{Key: v1.TaintNodeOutOfService, 619 Effect: v1.TaintEffectNoExecute}}}, 620 }, 621 pods: []nameToPodConfig{ 622 {name: "a", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"}, 623 {name: "b", phase: v1.PodFailed, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"}, 624 {name: "c", phase: v1.PodSucceeded, deletionTimeStamp: &metav1.Time{}, nodeName: "worker"}, 625 }, 626 deletedPodNames: sets.NewString("a", "b", "c"), 627 patchedPodNames: sets.NewString("a"), 628 enablePodDisruptionConditions: true, 629 }, 630 } 631 for _, test := range testCases { 632 t.Run(test.name, func(t *testing.T) { 633 _, ctx := ktesting.NewTestContext(t) 634 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() 635 636 creationTime := time.Unix(0, 0) 637 nodes := make([]*v1.Node, 0, len(test.nodes)) 638 for _, node := range test.nodes { 639 creationTime = creationTime.Add(2 * time.Hour) 640 nodes = append(nodes, &v1.Node{ 641 ObjectMeta: metav1.ObjectMeta{Name: node.name, CreationTimestamp: metav1.Time{Time: creationTime}}, 642 Spec: v1.NodeSpec{ 643 Taints: node.taints, 644 }, 645 Status: v1.NodeStatus{ 646 Conditions: []v1.NodeCondition{ 647 { 648 Type: v1.NodeReady, 649 Status: node.readyCondition, 650 }, 651 }, 652 }, 653 }) 654 } 655 pods := make([]*v1.Pod, 0, len(test.pods)) 656 for _, pod := range test.pods { 657 creationTime = creationTime.Add(1 * time.Hour) 658 pods = append(pods, &v1.Pod{ 659 ObjectMeta: metav1.ObjectMeta{Name: pod.name, Namespace: metav1.NamespaceDefault, CreationTimestamp: metav1.Time{Time: creationTime}, 660 DeletionTimestamp: pod.deletionTimeStamp}, 661 Status: v1.PodStatus{Phase: pod.phase}, 662 Spec: v1.PodSpec{NodeName: pod.nodeName}, 663 }) 664 } 665 client := setupNewSimpleClient(nodes, pods) 666 gcc, podInformer, nodeInformer := NewFromClient(ctx, client, -1) 667 668 for _, pod := range pods { 669 podInformer.Informer().GetStore().Add(pod) 670 } 671 for _, node := range nodes { 672 nodeInformer.Informer().GetStore().Add(node) 673 } 674 675 gcc.gc(ctx) 676 verifyDeletedAndPatchedPods(t, client, test.deletedPodNames, test.patchedPodNames) 677 }) 678 } 679 // testDeletingPodsMetrics is 7 in this test 680 testDeletingPodsMetrics(t, 7, metrics.PodGCReasonTerminatingOutOfService) 681 } 682 683 func TestGCInspectingPatchedPodBeforeDeletion(t *testing.T) { 684 testCases := []struct { 685 name string 686 pod *v1.Pod 687 expectedPatchedPod *v1.Pod 688 expectedDeleteAction *clienttesting.DeleteActionImpl 689 }{ 690 { 691 name: "orphaned pod should have DisruptionTarget condition added before deletion", 692 pod: &v1.Pod{ 693 ObjectMeta: metav1.ObjectMeta{ 694 Namespace: "default", 695 Name: "testPod", 696 }, 697 Spec: v1.PodSpec{ 698 NodeName: "deletedNode", 699 }, 700 Status: v1.PodStatus{ 701 Phase: v1.PodRunning, 702 Conditions: []v1.PodCondition{ 703 { 704 Type: v1.PodReady, 705 Status: v1.ConditionTrue, 706 }, 707 }, 708 }, 709 }, 710 expectedPatchedPod: &v1.Pod{ 711 ObjectMeta: metav1.ObjectMeta{ 712 Namespace: "default", 713 Name: "testPod", 714 }, 715 Spec: v1.PodSpec{ 716 NodeName: "deletedNode", 717 }, 718 Status: v1.PodStatus{ 719 Phase: v1.PodFailed, 720 Conditions: []v1.PodCondition{ 721 { 722 Type: v1.PodReady, 723 Status: v1.ConditionTrue, 724 }, 725 { 726 Type: v1.DisruptionTarget, 727 Status: v1.ConditionTrue, 728 Reason: "DeletionByPodGC", 729 Message: "PodGC: node no longer exists", 730 }, 731 }, 732 }, 733 }, 734 expectedDeleteAction: &clienttesting.DeleteActionImpl{ 735 Name: "testPod", 736 DeleteOptions: metav1.DeleteOptions{GracePeriodSeconds: pointer.Int64(0)}, 737 }, 738 }, 739 } 740 741 for _, test := range testCases { 742 t.Run(test.name, func(t *testing.T) { 743 _, ctx := ktesting.NewTestContext(t) 744 defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, true)() 745 746 pods := []*v1.Pod{test.pod} 747 748 client := setupNewSimpleClient(nil, pods) 749 gcc, podInformer, _ := NewFromClient(ctx, client, -1) 750 gcc.quarantineTime = time.Duration(-1) 751 podInformer.Informer().GetStore().Add(test.pod) 752 gcc.gc(ctx) 753 754 actions := client.Actions() 755 756 var patchAction clienttesting.PatchAction 757 var deleteAction clienttesting.DeleteAction 758 759 for _, action := range actions { 760 if action.GetVerb() == "patch" { 761 patchAction = action.(clienttesting.PatchAction) 762 } 763 764 if action.GetVerb() == "delete" { 765 deleteAction = action.(clienttesting.DeleteAction) 766 } 767 } 768 769 if patchAction != nil && test.expectedPatchedPod == nil { 770 t.Fatalf("Pod was pactched but expectedPatchedPod is nil") 771 } 772 if test.expectedPatchedPod != nil { 773 patchedPodBytes := patchAction.GetPatch() 774 originalPod, err := json.Marshal(test.pod) 775 if err != nil { 776 t.Fatalf("Failed to marshal original pod %#v: %v", originalPod, err) 777 } 778 updated, err := strategicpatch.StrategicMergePatch(originalPod, patchedPodBytes, v1.Pod{}) 779 if err != nil { 780 t.Fatalf("Failed to apply strategic merge patch %q on pod %#v: %v", patchedPodBytes, originalPod, err) 781 } 782 783 updatedPod := &v1.Pod{} 784 if err := json.Unmarshal(updated, updatedPod); err != nil { 785 t.Fatalf("Failed to unmarshal updated pod %q: %v", updated, err) 786 } 787 788 if diff := cmp.Diff(test.expectedPatchedPod, updatedPod, cmpopts.IgnoreFields(v1.Pod{}, "TypeMeta"), cmpopts.IgnoreFields(v1.PodCondition{}, "LastTransitionTime")); diff != "" { 789 t.Fatalf("Unexpected diff on pod (-want,+got):\n%s", diff) 790 } 791 } 792 793 if deleteAction != nil && test.expectedDeleteAction == nil { 794 t.Fatalf("Pod was deleted but expectedDeleteAction is nil") 795 } 796 if test.expectedDeleteAction != nil { 797 if diff := cmp.Diff(*test.expectedDeleteAction, deleteAction, cmpopts.IgnoreFields(clienttesting.DeleteActionImpl{}, "ActionImpl")); diff != "" { 798 t.Fatalf("Unexpected diff on deleteAction (-want,+got):\n%s", diff) 799 } 800 } 801 }) 802 } 803 } 804 805 func verifyDeletedAndPatchedPods(t *testing.T, client *fake.Clientset, wantDeletedPodNames, wantPatchedPodNames sets.String) { 806 t.Helper() 807 deletedPodNames := getDeletedPodNames(client) 808 if diff := cmp.Diff(wantDeletedPodNames, deletedPodNames); diff != "" { 809 t.Errorf("Deleted pod names (-want,+got):\n%s", diff) 810 } 811 patchedPodNames := getPatchedPodNames(client) 812 if diff := cmp.Diff(wantPatchedPodNames, patchedPodNames); diff != "" { 813 t.Errorf("Patched pod names (-want,+got):\n%s", diff) 814 } 815 } 816 817 func testDeletingPodsMetrics(t *testing.T, total int, reason string) { 818 t.Helper() 819 820 actualDeletingPodsTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsTotal.WithLabelValues(metav1.NamespaceDefault, reason)) 821 if err != nil { 822 t.Errorf("Error getting actualDeletingPodsTotal") 823 } 824 if actualDeletingPodsTotal != float64(total) { 825 t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", total, actualDeletingPodsTotal) 826 } 827 828 actualDeletingPodsErrorTotal, err := metricstestutil.GetCounterMetricValue(metrics.DeletingPodsErrorTotal.WithLabelValues("", reason)) 829 if err != nil { 830 t.Errorf("Error getting actualDeletingPodsErrorTotal") 831 } 832 if actualDeletingPodsErrorTotal != float64(0) { 833 t.Errorf("Expected desiredDeletingPodsTotal to be %d, got %v", 0, actualDeletingPodsErrorTotal) 834 } 835 } 836 837 func setupNewSimpleClient(nodes []*v1.Node, pods []*v1.Pod) *fake.Clientset { 838 podList := &v1.PodList{} 839 for _, podItem := range pods { 840 podList.Items = append(podList.Items, *podItem) 841 } 842 nodeList := &v1.NodeList{} 843 for _, nodeItem := range nodes { 844 nodeList.Items = append(nodeList.Items, *nodeItem) 845 } 846 return fake.NewSimpleClientset(nodeList, podList) 847 } 848 849 func getDeletedPodNames(client *fake.Clientset) sets.String { 850 deletedPodNames := sets.NewString() 851 for _, action := range client.Actions() { 852 if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" { 853 deleteAction := action.(clienttesting.DeleteAction) 854 deletedPodNames.Insert(deleteAction.GetName()) 855 } 856 } 857 return deletedPodNames 858 } 859 860 func getPatchedPodNames(client *fake.Clientset) sets.String { 861 patchedPodNames := sets.NewString() 862 for _, action := range client.Actions() { 863 if action.GetVerb() == "patch" && action.GetResource().Resource == "pods" { 864 patchAction := action.(clienttesting.PatchAction) 865 patchedPodNames.Insert(patchAction.GetName()) 866 } 867 } 868 return patchedPodNames 869 }