k8s.io/kubernetes@v1.29.3/pkg/controller/tainteviction/taint_eviction_test.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tainteviction 18 19 import ( 20 "context" 21 "fmt" 22 "sort" 23 "testing" 24 "time" 25 26 "github.com/google/go-cmp/cmp" 27 28 corev1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/fields" 31 "k8s.io/apimachinery/pkg/labels" 32 "k8s.io/apimachinery/pkg/types" 33 "k8s.io/apimachinery/pkg/util/wait" 34 "k8s.io/apiserver/pkg/util/feature" 35 "k8s.io/client-go/informers" 36 "k8s.io/client-go/kubernetes/fake" 37 clienttesting "k8s.io/client-go/testing" 38 "k8s.io/client-go/tools/cache" 39 featuregatetesting "k8s.io/component-base/featuregate/testing" 40 "k8s.io/kubernetes/pkg/controller/testutil" 41 "k8s.io/kubernetes/pkg/features" 42 ) 43 44 var timeForControllerToProgressForSanityCheck = 20 * time.Millisecond 45 46 func getPodsAssignedToNode(ctx context.Context, c *fake.Clientset) GetPodsByNodeNameFunc { 47 return func(nodeName string) ([]*corev1.Pod, error) { 48 selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}) 49 pods, err := c.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{ 50 FieldSelector: selector.String(), 51 LabelSelector: labels.Everything().String(), 52 }) 53 if err != nil { 54 return []*corev1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName) 55 } 56 rPods := make([]*corev1.Pod, len(pods.Items)) 57 for i := range pods.Items { 58 rPods[i] = &pods.Items[i] 59 } 60 return rPods, nil 61 } 62 } 63 64 func createNoExecuteTaint(index int) corev1.Taint { 65 now := metav1.Now() 66 return corev1.Taint{ 67 Key: "testTaint" + fmt.Sprintf("%v", index), 68 Value: "test" + fmt.Sprintf("%v", index), 69 Effect: corev1.TaintEffectNoExecute, 70 TimeAdded: &now, 71 } 72 } 73 74 func addToleration(pod *corev1.Pod, index int, duration int64) *corev1.Pod { 75 if pod.Annotations == nil { 76 pod.Annotations = map[string]string{} 77 } 78 if duration < 0 { 79 pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute}} 80 81 } else { 82 pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &duration}} 83 } 84 return pod 85 } 86 87 func addTaintsToNode(node *corev1.Node, key, value string, indices []int) *corev1.Node { 88 taints := []corev1.Taint{} 89 for _, index := range indices { 90 taints = append(taints, createNoExecuteTaint(index)) 91 } 92 node.Spec.Taints = taints 93 return node 94 } 95 96 var alwaysReady = func() bool { return true } 97 98 func setupNewController(ctx context.Context, fakeClientSet *fake.Clientset) (*Controller, cache.Indexer, cache.Indexer) { 99 informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0) 100 podIndexer := informerFactory.Core().V1().Pods().Informer().GetIndexer() 101 nodeIndexer := informerFactory.Core().V1().Nodes().Informer().GetIndexer() 102 mgr, _ := New(ctx, fakeClientSet, informerFactory.Core().V1().Pods(), informerFactory.Core().V1().Nodes(), "taint-eviction-controller") 103 mgr.podListerSynced = alwaysReady 104 mgr.nodeListerSynced = alwaysReady 105 mgr.getPodsAssignedToNode = getPodsAssignedToNode(ctx, fakeClientSet) 106 return mgr, podIndexer, nodeIndexer 107 } 108 109 type timestampedPod struct { 110 names []string 111 timestamp time.Duration 112 } 113 114 type durationSlice []timestampedPod 115 116 func (a durationSlice) Len() int { return len(a) } 117 func (a durationSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 118 func (a durationSlice) Less(i, j int) bool { return a[i].timestamp < a[j].timestamp } 119 120 func TestFilterNoExecuteTaints(t *testing.T) { 121 taints := []corev1.Taint{ 122 { 123 Key: "one", 124 Value: "one", 125 Effect: corev1.TaintEffectNoExecute, 126 }, 127 { 128 Key: "two", 129 Value: "two", 130 Effect: corev1.TaintEffectNoSchedule, 131 }, 132 } 133 taints = getNoExecuteTaints(taints) 134 if len(taints) != 1 || taints[0].Key != "one" { 135 t.Errorf("Filtering doesn't work. Got %v", taints) 136 } 137 } 138 139 func TestCreatePod(t *testing.T) { 140 testCases := []struct { 141 description string 142 pod *corev1.Pod 143 taintedNodes map[string][]corev1.Taint 144 expectPatch bool 145 expectDelete bool 146 enablePodDisruptionConditions bool 147 }{ 148 { 149 description: "not scheduled - ignore", 150 pod: testutil.NewPod("pod1", ""), 151 taintedNodes: map[string][]corev1.Taint{}, 152 expectDelete: false, 153 }, 154 { 155 description: "scheduled on untainted Node", 156 pod: testutil.NewPod("pod1", "node1"), 157 taintedNodes: map[string][]corev1.Taint{}, 158 expectDelete: false, 159 }, 160 { 161 description: "schedule on tainted Node", 162 pod: testutil.NewPod("pod1", "node1"), 163 taintedNodes: map[string][]corev1.Taint{ 164 "node1": {createNoExecuteTaint(1)}, 165 }, 166 expectDelete: true, 167 }, 168 { 169 description: "schedule on tainted Node; PodDisruptionConditions enabled", 170 pod: testutil.NewPod("pod1", "node1"), 171 taintedNodes: map[string][]corev1.Taint{ 172 "node1": {createNoExecuteTaint(1)}, 173 }, 174 expectPatch: true, 175 expectDelete: true, 176 enablePodDisruptionConditions: true, 177 }, 178 { 179 description: "schedule on tainted Node with finite toleration", 180 pod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 181 taintedNodes: map[string][]corev1.Taint{ 182 "node1": {createNoExecuteTaint(1)}, 183 }, 184 expectDelete: false, 185 }, 186 { 187 description: "schedule on tainted Node with infinite toleration", 188 pod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1), 189 taintedNodes: map[string][]corev1.Taint{ 190 "node1": {createNoExecuteTaint(1)}, 191 }, 192 expectDelete: false, 193 }, 194 { 195 description: "schedule on tainted Node with infinite ivalid toleration", 196 pod: addToleration(testutil.NewPod("pod1", "node1"), 2, -1), 197 taintedNodes: map[string][]corev1.Taint{ 198 "node1": {createNoExecuteTaint(1)}, 199 }, 200 expectDelete: true, 201 }, 202 } 203 204 for _, item := range testCases { 205 t.Run(item.description, func(t *testing.T) { 206 defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)() 207 ctx, cancel := context.WithCancel(context.Background()) 208 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.pod}}) 209 controller, podIndexer, _ := setupNewController(ctx, fakeClientset) 210 controller.recorder = testutil.NewFakeRecorder() 211 go controller.Run(ctx) 212 controller.taintedNodes = item.taintedNodes 213 214 podIndexer.Add(item.pod) 215 controller.PodUpdated(nil, item.pod) 216 217 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 218 219 cancel() 220 }) 221 } 222 } 223 224 func TestDeletePod(t *testing.T) { 225 ctx, cancel := context.WithCancel(context.Background()) 226 defer cancel() 227 228 fakeClientset := fake.NewSimpleClientset() 229 controller, _, _ := setupNewController(ctx, fakeClientset) 230 controller.recorder = testutil.NewFakeRecorder() 231 go controller.Run(ctx) 232 controller.taintedNodes = map[string][]corev1.Taint{ 233 "node1": {createNoExecuteTaint(1)}, 234 } 235 controller.PodUpdated(testutil.NewPod("pod1", "node1"), nil) 236 // wait a bit to see if nothing will panic 237 time.Sleep(timeForControllerToProgressForSanityCheck) 238 } 239 240 func TestUpdatePod(t *testing.T) { 241 testCases := []struct { 242 description string 243 prevPod *corev1.Pod 244 awaitForScheduledEviction bool 245 newPod *corev1.Pod 246 taintedNodes map[string][]corev1.Taint 247 expectPatch bool 248 expectDelete bool 249 enablePodDisruptionConditions bool 250 }{ 251 { 252 description: "scheduling onto tainted Node results in patch and delete when PodDisruptionConditions enabled", 253 prevPod: testutil.NewPod("pod1", ""), 254 newPod: testutil.NewPod("pod1", "node1"), 255 taintedNodes: map[string][]corev1.Taint{ 256 "node1": {createNoExecuteTaint(1)}, 257 }, 258 expectPatch: true, 259 expectDelete: true, 260 enablePodDisruptionConditions: true, 261 }, 262 { 263 description: "scheduling onto tainted Node", 264 prevPod: testutil.NewPod("pod1", ""), 265 newPod: testutil.NewPod("pod1", "node1"), 266 taintedNodes: map[string][]corev1.Taint{ 267 "node1": {createNoExecuteTaint(1)}, 268 }, 269 expectDelete: true, 270 }, 271 { 272 description: "scheduling onto tainted Node with toleration", 273 prevPod: addToleration(testutil.NewPod("pod1", ""), 1, -1), 274 newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1), 275 taintedNodes: map[string][]corev1.Taint{ 276 "node1": {createNoExecuteTaint(1)}, 277 }, 278 expectDelete: false, 279 }, 280 { 281 description: "removing toleration", 282 prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 283 newPod: testutil.NewPod("pod1", "node1"), 284 awaitForScheduledEviction: true, 285 taintedNodes: map[string][]corev1.Taint{ 286 "node1": {createNoExecuteTaint(1)}, 287 }, 288 expectDelete: true, 289 }, 290 { 291 description: "lengthening toleration shouldn't work", 292 prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 1), 293 newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 294 awaitForScheduledEviction: true, 295 taintedNodes: map[string][]corev1.Taint{ 296 "node1": {createNoExecuteTaint(1)}, 297 }, 298 expectDelete: true, 299 }, 300 } 301 302 for _, item := range testCases { 303 t.Run(item.description, func(t *testing.T) { 304 defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)() 305 ctx, cancel := context.WithCancel(context.Background()) 306 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.prevPod}}) 307 controller, podIndexer, _ := setupNewController(context.TODO(), fakeClientset) 308 controller.recorder = testutil.NewFakeRecorder() 309 controller.taintedNodes = item.taintedNodes 310 go controller.Run(ctx) 311 312 podIndexer.Add(item.prevPod) 313 controller.PodUpdated(nil, item.prevPod) 314 315 if item.awaitForScheduledEviction { 316 nsName := types.NamespacedName{Namespace: item.prevPod.Namespace, Name: item.prevPod.Name} 317 err := wait.PollImmediate(time.Millisecond*10, time.Second, func() (bool, error) { 318 scheduledEviction := controller.taintEvictionQueue.GetWorkerUnsafe(nsName.String()) 319 return scheduledEviction != nil, nil 320 }) 321 if err != nil { 322 t.Fatalf("Failed to await for scheduled eviction: %q", err) 323 } 324 } 325 326 podIndexer.Update(item.newPod) 327 controller.PodUpdated(item.prevPod, item.newPod) 328 329 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 330 cancel() 331 }) 332 } 333 } 334 335 func TestCreateNode(t *testing.T) { 336 testCases := []struct { 337 description string 338 pods []corev1.Pod 339 node *corev1.Node 340 expectPatch bool 341 expectDelete bool 342 }{ 343 { 344 description: "Creating Node matching already assigned Pod", 345 pods: []corev1.Pod{ 346 *testutil.NewPod("pod1", "node1"), 347 }, 348 node: testutil.NewNode("node1"), 349 expectPatch: false, 350 expectDelete: false, 351 }, 352 { 353 description: "Creating tainted Node matching already assigned Pod", 354 pods: []corev1.Pod{ 355 *testutil.NewPod("pod1", "node1"), 356 }, 357 node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 358 expectPatch: true, 359 expectDelete: true, 360 }, 361 { 362 description: "Creating tainted Node matching already assigned tolerating Pod", 363 pods: []corev1.Pod{ 364 *addToleration(testutil.NewPod("pod1", "node1"), 1, -1), 365 }, 366 node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 367 expectPatch: false, 368 expectDelete: false, 369 }, 370 } 371 372 for _, item := range testCases { 373 ctx, cancel := context.WithCancel(context.Background()) 374 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 375 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 376 nodeIndexer.Add(item.node) 377 controller.recorder = testutil.NewFakeRecorder() 378 go controller.Run(ctx) 379 controller.NodeUpdated(nil, item.node) 380 381 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 382 383 cancel() 384 } 385 } 386 387 func TestDeleteNode(t *testing.T) { 388 ctx, cancel := context.WithCancel(context.Background()) 389 fakeClientset := fake.NewSimpleClientset() 390 controller, _, _ := setupNewController(ctx, fakeClientset) 391 controller.recorder = testutil.NewFakeRecorder() 392 controller.taintedNodes = map[string][]corev1.Taint{ 393 "node1": {createNoExecuteTaint(1)}, 394 } 395 go controller.Run(ctx) 396 controller.NodeUpdated(testutil.NewNode("node1"), nil) 397 398 // await until controller.taintedNodes is empty 399 err := wait.PollImmediate(10*time.Millisecond, time.Second, func() (bool, error) { 400 controller.taintedNodesLock.Lock() 401 defer controller.taintedNodesLock.Unlock() 402 _, ok := controller.taintedNodes["node1"] 403 return !ok, nil 404 }) 405 if err != nil { 406 t.Errorf("Failed to await for processing node deleted: %q", err) 407 } 408 cancel() 409 } 410 411 func TestUpdateNode(t *testing.T) { 412 testCases := []struct { 413 description string 414 pods []corev1.Pod 415 oldNode *corev1.Node 416 newNode *corev1.Node 417 expectPatch bool 418 expectDelete bool 419 additionalSleep time.Duration 420 enablePodDisruptionConditions bool 421 }{ 422 { 423 description: "Added taint, expect node patched and deleted when PodDisruptionConditions is enabled", 424 pods: []corev1.Pod{ 425 *testutil.NewPod("pod1", "node1"), 426 }, 427 oldNode: testutil.NewNode("node1"), 428 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 429 expectPatch: true, 430 expectDelete: true, 431 enablePodDisruptionConditions: true, 432 }, 433 { 434 description: "Added taint", 435 pods: []corev1.Pod{ 436 *testutil.NewPod("pod1", "node1"), 437 }, 438 oldNode: testutil.NewNode("node1"), 439 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 440 expectDelete: true, 441 }, 442 { 443 description: "Added tolerated taint", 444 pods: []corev1.Pod{ 445 *addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 446 }, 447 oldNode: testutil.NewNode("node1"), 448 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 449 expectDelete: false, 450 }, 451 { 452 description: "Only one added taint tolerated", 453 pods: []corev1.Pod{ 454 *addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 455 }, 456 oldNode: testutil.NewNode("node1"), 457 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}), 458 expectDelete: true, 459 }, 460 { 461 description: "Taint removed", 462 pods: []corev1.Pod{ 463 *addToleration(testutil.NewPod("pod1", "node1"), 1, 1), 464 }, 465 oldNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 466 newNode: testutil.NewNode("node1"), 467 expectDelete: false, 468 additionalSleep: 1500 * time.Millisecond, 469 }, 470 { 471 description: "Pod with multiple tolerations are evicted when first one runs out", 472 pods: []corev1.Pod{ 473 { 474 ObjectMeta: metav1.ObjectMeta{ 475 Namespace: "default", 476 Name: "pod1", 477 }, 478 Spec: corev1.PodSpec{ 479 NodeName: "node1", 480 Tolerations: []corev1.Toleration{ 481 {Key: "testTaint1", Value: "test1", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{1}[0]}, 482 {Key: "testTaint2", Value: "test2", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{100}[0]}, 483 }, 484 }, 485 Status: corev1.PodStatus{ 486 Conditions: []corev1.PodCondition{ 487 { 488 Type: corev1.PodReady, 489 Status: corev1.ConditionTrue, 490 }, 491 }, 492 }, 493 }, 494 }, 495 oldNode: testutil.NewNode("node1"), 496 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}), 497 expectDelete: true, 498 }, 499 } 500 501 for _, item := range testCases { 502 t.Run(item.description, func(t *testing.T) { 503 defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)() 504 ctx, cancel := context.WithCancel(context.Background()) 505 defer cancel() 506 507 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 508 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 509 nodeIndexer.Add(item.newNode) 510 controller.recorder = testutil.NewFakeRecorder() 511 go controller.Run(ctx) 512 controller.NodeUpdated(item.oldNode, item.newNode) 513 514 if item.additionalSleep > 0 { 515 time.Sleep(item.additionalSleep) 516 } 517 518 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 519 }) 520 } 521 } 522 523 func TestUpdateNodeWithMultipleTaints(t *testing.T) { 524 taint1 := createNoExecuteTaint(1) 525 taint2 := createNoExecuteTaint(2) 526 527 minute := int64(60) 528 pod := testutil.NewPod("pod1", "node1") 529 pod.Spec.Tolerations = []corev1.Toleration{ 530 {Key: taint1.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute}, 531 {Key: taint2.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &minute}, 532 } 533 podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} 534 535 untaintedNode := testutil.NewNode("node1") 536 537 doubleTaintedNode := testutil.NewNode("node1") 538 doubleTaintedNode.Spec.Taints = []corev1.Taint{taint1, taint2} 539 540 singleTaintedNode := testutil.NewNode("node1") 541 singleTaintedNode.Spec.Taints = []corev1.Taint{taint1} 542 543 ctx, cancel := context.WithCancel(context.TODO()) 544 fakeClientset := fake.NewSimpleClientset(pod) 545 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 546 controller.recorder = testutil.NewFakeRecorder() 547 go controller.Run(ctx) 548 549 // no taint 550 nodeIndexer.Add(untaintedNode) 551 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 552 // verify pod is not queued for deletion 553 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil { 554 t.Fatalf("pod queued for deletion with no taints") 555 } 556 557 // no taint -> infinitely tolerated taint 558 nodeIndexer.Update(singleTaintedNode) 559 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 560 // verify pod is not queued for deletion 561 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil { 562 t.Fatalf("pod queued for deletion with permanently tolerated taint") 563 } 564 565 // infinitely tolerated taint -> temporarily tolerated taint 566 nodeIndexer.Update(doubleTaintedNode) 567 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 568 // verify pod is queued for deletion 569 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) == nil { 570 t.Fatalf("pod not queued for deletion after addition of temporarily tolerated taint") 571 } 572 573 // temporarily tolerated taint -> infinitely tolerated taint 574 nodeIndexer.Update(singleTaintedNode) 575 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 576 // verify pod is not queued for deletion 577 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil { 578 t.Fatalf("pod queued for deletion after removal of temporarily tolerated taint") 579 } 580 581 // verify pod is not deleted 582 for _, action := range fakeClientset.Actions() { 583 if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" { 584 t.Error("Unexpected deletion") 585 } 586 } 587 cancel() 588 } 589 590 func TestUpdateNodeWithMultiplePods(t *testing.T) { 591 testCases := []struct { 592 description string 593 pods []corev1.Pod 594 oldNode *corev1.Node 595 newNode *corev1.Node 596 expectedDeleteTimes durationSlice 597 }{ 598 { 599 description: "Pods with different toleration times are evicted appropriately", 600 pods: []corev1.Pod{ 601 *testutil.NewPod("pod1", "node1"), 602 *addToleration(testutil.NewPod("pod2", "node1"), 1, 1), 603 *addToleration(testutil.NewPod("pod3", "node1"), 1, -1), 604 }, 605 oldNode: testutil.NewNode("node1"), 606 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 607 expectedDeleteTimes: durationSlice{ 608 {[]string{"pod1"}, 0}, 609 {[]string{"pod2"}, time.Second}, 610 }, 611 }, 612 { 613 description: "Evict all pods not matching all taints instantly", 614 pods: []corev1.Pod{ 615 *testutil.NewPod("pod1", "node1"), 616 *addToleration(testutil.NewPod("pod2", "node1"), 1, 1), 617 *addToleration(testutil.NewPod("pod3", "node1"), 1, -1), 618 }, 619 oldNode: testutil.NewNode("node1"), 620 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}), 621 expectedDeleteTimes: durationSlice{ 622 {[]string{"pod1", "pod2", "pod3"}, 0}, 623 }, 624 }, 625 } 626 627 for _, item := range testCases { 628 t.Run(item.description, func(t *testing.T) { 629 t.Logf("Starting testcase %q", item.description) 630 ctx, cancel := context.WithCancel(context.Background()) 631 defer cancel() 632 633 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 634 sort.Sort(item.expectedDeleteTimes) 635 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 636 nodeIndexer.Add(item.newNode) 637 controller.recorder = testutil.NewFakeRecorder() 638 go controller.Run(ctx) 639 controller.NodeUpdated(item.oldNode, item.newNode) 640 641 startedAt := time.Now() 642 for i := range item.expectedDeleteTimes { 643 if i == 0 || item.expectedDeleteTimes[i-1].timestamp != item.expectedDeleteTimes[i].timestamp { 644 // compute a grace duration to give controller time to process updates. Choose big 645 // enough intervals in the test cases above to avoid flakes. 646 var increment time.Duration 647 if i == len(item.expectedDeleteTimes)-1 || item.expectedDeleteTimes[i+1].timestamp == item.expectedDeleteTimes[i].timestamp { 648 increment = 500 * time.Millisecond 649 } else { 650 increment = ((item.expectedDeleteTimes[i+1].timestamp - item.expectedDeleteTimes[i].timestamp) / time.Duration(2)) 651 } 652 653 sleepTime := item.expectedDeleteTimes[i].timestamp - time.Since(startedAt) + increment 654 if sleepTime < 0 { 655 sleepTime = 0 656 } 657 t.Logf("Sleeping for %v", sleepTime) 658 time.Sleep(sleepTime) 659 } 660 661 for delay, podName := range item.expectedDeleteTimes[i].names { 662 deleted := false 663 for _, action := range fakeClientset.Actions() { 664 deleteAction, ok := action.(clienttesting.DeleteActionImpl) 665 if !ok { 666 t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb()) 667 continue 668 } 669 if deleteAction.GetResource().Resource != "pods" { 670 continue 671 } 672 if podName == deleteAction.GetName() { 673 deleted = true 674 } 675 } 676 if !deleted { 677 t.Errorf("Failed to deleted pod %v after %v", podName, delay) 678 } 679 } 680 for _, action := range fakeClientset.Actions() { 681 deleteAction, ok := action.(clienttesting.DeleteActionImpl) 682 if !ok { 683 t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb()) 684 continue 685 } 686 if deleteAction.GetResource().Resource != "pods" { 687 continue 688 } 689 deletedPodName := deleteAction.GetName() 690 expected := false 691 for _, podName := range item.expectedDeleteTimes[i].names { 692 if podName == deletedPodName { 693 expected = true 694 } 695 } 696 if !expected { 697 t.Errorf("Pod %v was deleted even though it shouldn't have", deletedPodName) 698 } 699 } 700 fakeClientset.ClearActions() 701 } 702 }) 703 } 704 } 705 706 func TestGetMinTolerationTime(t *testing.T) { 707 one := int64(1) 708 two := int64(2) 709 oneSec := 1 * time.Second 710 711 tests := []struct { 712 tolerations []corev1.Toleration 713 expected time.Duration 714 }{ 715 { 716 tolerations: []corev1.Toleration{}, 717 expected: 0, 718 }, 719 { 720 tolerations: []corev1.Toleration{ 721 { 722 TolerationSeconds: nil, 723 }, 724 }, 725 expected: -1, 726 }, 727 { 728 tolerations: []corev1.Toleration{ 729 { 730 TolerationSeconds: &one, 731 }, 732 { 733 TolerationSeconds: &two, 734 }, 735 }, 736 expected: oneSec, 737 }, 738 739 { 740 tolerations: []corev1.Toleration{ 741 { 742 TolerationSeconds: &one, 743 }, 744 { 745 TolerationSeconds: nil, 746 }, 747 }, 748 expected: oneSec, 749 }, 750 { 751 tolerations: []corev1.Toleration{ 752 { 753 TolerationSeconds: nil, 754 }, 755 { 756 TolerationSeconds: &one, 757 }, 758 }, 759 expected: oneSec, 760 }, 761 } 762 763 for _, test := range tests { 764 got := getMinTolerationTime(test.tolerations) 765 if got != test.expected { 766 t.Errorf("Incorrect min toleration time: got %v, expected %v", got, test.expected) 767 } 768 } 769 } 770 771 // TestEventualConsistency verifies if getPodsAssignedToNode returns incomplete data 772 // (e.g. due to watch latency), it will reconcile the remaining pods eventually. 773 // This scenario is partially covered by TestUpdatePods, but given this is an important 774 // property of TaintManager, it's better to have explicit test for this. 775 func TestEventualConsistency(t *testing.T) { 776 testCases := []struct { 777 description string 778 pods []corev1.Pod 779 prevPod *corev1.Pod 780 newPod *corev1.Pod 781 oldNode *corev1.Node 782 newNode *corev1.Node 783 expectPatch bool 784 expectDelete bool 785 }{ 786 { 787 description: "existing pod2 scheduled onto tainted Node", 788 pods: []corev1.Pod{ 789 *testutil.NewPod("pod1", "node1"), 790 }, 791 prevPod: testutil.NewPod("pod2", ""), 792 newPod: testutil.NewPod("pod2", "node1"), 793 oldNode: testutil.NewNode("node1"), 794 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 795 expectPatch: true, 796 expectDelete: true, 797 }, 798 { 799 description: "existing pod2 with taint toleration scheduled onto tainted Node", 800 pods: []corev1.Pod{ 801 *testutil.NewPod("pod1", "node1"), 802 }, 803 prevPod: addToleration(testutil.NewPod("pod2", ""), 1, 100), 804 newPod: addToleration(testutil.NewPod("pod2", "node1"), 1, 100), 805 oldNode: testutil.NewNode("node1"), 806 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 807 expectPatch: true, 808 expectDelete: true, 809 }, 810 { 811 description: "new pod2 created on tainted Node", 812 pods: []corev1.Pod{ 813 *testutil.NewPod("pod1", "node1"), 814 }, 815 prevPod: nil, 816 newPod: testutil.NewPod("pod2", "node1"), 817 oldNode: testutil.NewNode("node1"), 818 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 819 expectPatch: true, 820 expectDelete: true, 821 }, 822 { 823 description: "new pod2 with tait toleration created on tainted Node", 824 pods: []corev1.Pod{ 825 *testutil.NewPod("pod1", "node1"), 826 }, 827 prevPod: nil, 828 newPod: addToleration(testutil.NewPod("pod2", "node1"), 1, 100), 829 oldNode: testutil.NewNode("node1"), 830 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 831 expectPatch: true, 832 expectDelete: true, 833 }, 834 } 835 836 for _, item := range testCases { 837 t.Run(item.description, func(t *testing.T) { 838 ctx, cancel := context.WithCancel(context.Background()) 839 defer cancel() 840 841 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 842 controller, podIndexer, nodeIndexer := setupNewController(ctx, fakeClientset) 843 nodeIndexer.Add(item.newNode) 844 controller.recorder = testutil.NewFakeRecorder() 845 go controller.Run(ctx) 846 847 if item.prevPod != nil { 848 podIndexer.Add(item.prevPod) 849 controller.PodUpdated(nil, item.prevPod) 850 } 851 852 // First we simulate NodeUpdate that should delete 'pod1'. It doesn't know about 'pod2' yet. 853 controller.NodeUpdated(item.oldNode, item.newNode) 854 855 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 856 fakeClientset.ClearActions() 857 858 // And now the delayed update of 'pod2' comes to the TaintManager. We should delete it as well. 859 podIndexer.Update(item.newPod) 860 controller.PodUpdated(item.prevPod, item.newPod) 861 // wait a bit 862 time.Sleep(timeForControllerToProgressForSanityCheck) 863 }) 864 } 865 } 866 867 func verifyPodActions(t *testing.T, description string, fakeClientset *fake.Clientset, expectPatch, expectDelete bool) { 868 t.Helper() 869 podPatched := false 870 podDeleted := false 871 // use Poll instead of PollImmediate to give some processing time to the controller that the expected 872 // actions are likely to be already sent 873 err := wait.Poll(10*time.Millisecond, 5*time.Second, func() (bool, error) { 874 for _, action := range fakeClientset.Actions() { 875 if action.GetVerb() == "patch" && action.GetResource().Resource == "pods" { 876 podPatched = true 877 } 878 if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" { 879 podDeleted = true 880 } 881 } 882 return podPatched == expectPatch && podDeleted == expectDelete, nil 883 }) 884 if err != nil { 885 t.Errorf("Failed waiting for the expected actions: %q", err) 886 } 887 if podPatched != expectPatch { 888 t.Errorf("[%v]Unexpected test result. Expected patch %v, got %v", description, expectPatch, podPatched) 889 } 890 if podDeleted != expectDelete { 891 t.Errorf("[%v]Unexpected test result. Expected delete %v, got %v", description, expectDelete, podDeleted) 892 } 893 } 894 895 // TestPodDeletionEvent Verify that the output events are as expected 896 func TestPodDeletionEvent(t *testing.T) { 897 f := func(path cmp.Path) bool { 898 switch path.String() { 899 // These fields change at runtime, so ignore it 900 case "LastTimestamp", "FirstTimestamp", "ObjectMeta.Name": 901 return true 902 } 903 return false 904 } 905 906 t.Run("emitPodDeletionEvent", func(t *testing.T) { 907 controller := &Controller{} 908 recorder := testutil.NewFakeRecorder() 909 controller.recorder = recorder 910 controller.emitPodDeletionEvent(types.NamespacedName{ 911 Name: "test", 912 Namespace: "test", 913 }) 914 want := []*corev1.Event{ 915 { 916 ObjectMeta: metav1.ObjectMeta{ 917 Namespace: "test", 918 }, 919 InvolvedObject: corev1.ObjectReference{ 920 Kind: "Pod", 921 APIVersion: "v1", 922 Namespace: "test", 923 Name: "test", 924 }, 925 Reason: "TaintManagerEviction", 926 Type: "Normal", 927 Count: 1, 928 Message: "Marking for deletion Pod test/test", 929 Source: corev1.EventSource{Component: "nodeControllerTest"}, 930 }, 931 } 932 if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 { 933 t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff) 934 } 935 }) 936 937 t.Run("emitCancelPodDeletionEvent", func(t *testing.T) { 938 controller := &Controller{} 939 recorder := testutil.NewFakeRecorder() 940 controller.recorder = recorder 941 controller.emitCancelPodDeletionEvent(types.NamespacedName{ 942 Name: "test", 943 Namespace: "test", 944 }) 945 want := []*corev1.Event{ 946 { 947 ObjectMeta: metav1.ObjectMeta{ 948 Namespace: "test", 949 }, 950 InvolvedObject: corev1.ObjectReference{ 951 Kind: "Pod", 952 APIVersion: "v1", 953 Namespace: "test", 954 Name: "test", 955 }, 956 Reason: "TaintManagerEviction", 957 Type: "Normal", 958 Count: 1, 959 Message: "Cancelling deletion of Pod test/test", 960 Source: corev1.EventSource{Component: "nodeControllerTest"}, 961 }, 962 } 963 if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 { 964 t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff) 965 } 966 }) 967 }