k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/tainteviction/taint_eviction_test.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tainteviction 18 19 import ( 20 "context" 21 "fmt" 22 goruntime "runtime" 23 "sort" 24 "testing" 25 "time" 26 27 "github.com/google/go-cmp/cmp" 28 29 corev1 "k8s.io/api/core/v1" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/fields" 32 "k8s.io/apimachinery/pkg/labels" 33 "k8s.io/apimachinery/pkg/types" 34 "k8s.io/apimachinery/pkg/util/wait" 35 "k8s.io/apiserver/pkg/util/feature" 36 "k8s.io/client-go/informers" 37 "k8s.io/client-go/kubernetes/fake" 38 clienttesting "k8s.io/client-go/testing" 39 "k8s.io/client-go/tools/cache" 40 featuregatetesting "k8s.io/component-base/featuregate/testing" 41 "k8s.io/kubernetes/pkg/controller/testutil" 42 "k8s.io/kubernetes/pkg/features" 43 ) 44 45 var timeForControllerToProgressForSanityCheck = 20 * time.Millisecond 46 47 func getPodsAssignedToNode(ctx context.Context, c *fake.Clientset) GetPodsByNodeNameFunc { 48 return func(nodeName string) ([]*corev1.Pod, error) { 49 selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}) 50 pods, err := c.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{ 51 FieldSelector: selector.String(), 52 LabelSelector: labels.Everything().String(), 53 }) 54 if err != nil { 55 return []*corev1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName) 56 } 57 rPods := make([]*corev1.Pod, len(pods.Items)) 58 for i := range pods.Items { 59 rPods[i] = &pods.Items[i] 60 } 61 return rPods, nil 62 } 63 } 64 65 func createNoExecuteTaint(index int) corev1.Taint { 66 now := metav1.Now() 67 return corev1.Taint{ 68 Key: "testTaint" + fmt.Sprintf("%v", index), 69 Value: "test" + fmt.Sprintf("%v", index), 70 Effect: corev1.TaintEffectNoExecute, 71 TimeAdded: &now, 72 } 73 } 74 75 func addToleration(pod *corev1.Pod, index int, duration int64) *corev1.Pod { 76 if pod.Annotations == nil { 77 pod.Annotations = map[string]string{} 78 } 79 if duration < 0 { 80 pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute}} 81 82 } else { 83 pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &duration}} 84 } 85 return pod 86 } 87 88 func addTaintsToNode(node *corev1.Node, key, value string, indices []int) *corev1.Node { 89 taints := []corev1.Taint{} 90 for _, index := range indices { 91 taints = append(taints, createNoExecuteTaint(index)) 92 } 93 node.Spec.Taints = taints 94 return node 95 } 96 97 var alwaysReady = func() bool { return true } 98 99 func setupNewController(ctx context.Context, fakeClientSet *fake.Clientset) (*Controller, cache.Indexer, cache.Indexer) { 100 informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0) 101 podIndexer := informerFactory.Core().V1().Pods().Informer().GetIndexer() 102 nodeIndexer := informerFactory.Core().V1().Nodes().Informer().GetIndexer() 103 mgr, _ := New(ctx, fakeClientSet, informerFactory.Core().V1().Pods(), informerFactory.Core().V1().Nodes(), "taint-eviction-controller") 104 mgr.podListerSynced = alwaysReady 105 mgr.nodeListerSynced = alwaysReady 106 mgr.getPodsAssignedToNode = getPodsAssignedToNode(ctx, fakeClientSet) 107 return mgr, podIndexer, nodeIndexer 108 } 109 110 type timestampedPod struct { 111 names []string 112 timestamp time.Duration 113 } 114 115 type durationSlice []timestampedPod 116 117 func (a durationSlice) Len() int { return len(a) } 118 func (a durationSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 119 func (a durationSlice) Less(i, j int) bool { return a[i].timestamp < a[j].timestamp } 120 121 func TestFilterNoExecuteTaints(t *testing.T) { 122 taints := []corev1.Taint{ 123 { 124 Key: "one", 125 Value: "one", 126 Effect: corev1.TaintEffectNoExecute, 127 }, 128 { 129 Key: "two", 130 Value: "two", 131 Effect: corev1.TaintEffectNoSchedule, 132 }, 133 } 134 taints = getNoExecuteTaints(taints) 135 if len(taints) != 1 || taints[0].Key != "one" { 136 t.Errorf("Filtering doesn't work. Got %v", taints) 137 } 138 } 139 140 func TestCreatePod(t *testing.T) { 141 testCases := []struct { 142 description string 143 pod *corev1.Pod 144 taintedNodes map[string][]corev1.Taint 145 expectPatch bool 146 expectDelete bool 147 enablePodDisruptionConditions bool 148 }{ 149 { 150 description: "not scheduled - ignore", 151 pod: testutil.NewPod("pod1", ""), 152 taintedNodes: map[string][]corev1.Taint{}, 153 expectDelete: false, 154 }, 155 { 156 description: "scheduled on untainted Node", 157 pod: testutil.NewPod("pod1", "node1"), 158 taintedNodes: map[string][]corev1.Taint{}, 159 expectDelete: false, 160 }, 161 { 162 description: "schedule on tainted Node", 163 pod: testutil.NewPod("pod1", "node1"), 164 taintedNodes: map[string][]corev1.Taint{ 165 "node1": {createNoExecuteTaint(1)}, 166 }, 167 expectDelete: true, 168 }, 169 { 170 description: "schedule on tainted Node; PodDisruptionConditions enabled", 171 pod: testutil.NewPod("pod1", "node1"), 172 taintedNodes: map[string][]corev1.Taint{ 173 "node1": {createNoExecuteTaint(1)}, 174 }, 175 expectPatch: true, 176 expectDelete: true, 177 enablePodDisruptionConditions: true, 178 }, 179 { 180 description: "schedule on tainted Node with finite toleration", 181 pod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 182 taintedNodes: map[string][]corev1.Taint{ 183 "node1": {createNoExecuteTaint(1)}, 184 }, 185 expectDelete: false, 186 }, 187 { 188 description: "schedule on tainted Node with infinite toleration", 189 pod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1), 190 taintedNodes: map[string][]corev1.Taint{ 191 "node1": {createNoExecuteTaint(1)}, 192 }, 193 expectDelete: false, 194 }, 195 { 196 description: "schedule on tainted Node with infinite ivalid toleration", 197 pod: addToleration(testutil.NewPod("pod1", "node1"), 2, -1), 198 taintedNodes: map[string][]corev1.Taint{ 199 "node1": {createNoExecuteTaint(1)}, 200 }, 201 expectDelete: true, 202 }, 203 } 204 205 for _, item := range testCases { 206 t.Run(item.description, func(t *testing.T) { 207 featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions) 208 ctx, cancel := context.WithCancel(context.Background()) 209 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.pod}}) 210 controller, podIndexer, _ := setupNewController(ctx, fakeClientset) 211 controller.recorder = testutil.NewFakeRecorder() 212 go controller.Run(ctx) 213 controller.taintedNodes = item.taintedNodes 214 215 podIndexer.Add(item.pod) 216 controller.PodUpdated(nil, item.pod) 217 218 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 219 220 cancel() 221 }) 222 } 223 } 224 225 func TestDeletePod(t *testing.T) { 226 ctx, cancel := context.WithCancel(context.Background()) 227 defer cancel() 228 229 fakeClientset := fake.NewSimpleClientset() 230 controller, _, _ := setupNewController(ctx, fakeClientset) 231 controller.recorder = testutil.NewFakeRecorder() 232 go controller.Run(ctx) 233 controller.taintedNodes = map[string][]corev1.Taint{ 234 "node1": {createNoExecuteTaint(1)}, 235 } 236 controller.PodUpdated(testutil.NewPod("pod1", "node1"), nil) 237 // wait a bit to see if nothing will panic 238 time.Sleep(timeForControllerToProgressForSanityCheck) 239 } 240 241 func TestUpdatePod(t *testing.T) { 242 testCases := []struct { 243 description string 244 prevPod *corev1.Pod 245 awaitForScheduledEviction bool 246 newPod *corev1.Pod 247 taintedNodes map[string][]corev1.Taint 248 expectPatch bool 249 expectDelete bool 250 enablePodDisruptionConditions bool 251 skipOnWindows bool 252 }{ 253 { 254 description: "scheduling onto tainted Node results in patch and delete when PodDisruptionConditions enabled", 255 prevPod: testutil.NewPod("pod1", ""), 256 newPod: testutil.NewPod("pod1", "node1"), 257 taintedNodes: map[string][]corev1.Taint{ 258 "node1": {createNoExecuteTaint(1)}, 259 }, 260 expectPatch: true, 261 expectDelete: true, 262 enablePodDisruptionConditions: true, 263 }, 264 { 265 description: "scheduling onto tainted Node", 266 prevPod: testutil.NewPod("pod1", ""), 267 newPod: testutil.NewPod("pod1", "node1"), 268 taintedNodes: map[string][]corev1.Taint{ 269 "node1": {createNoExecuteTaint(1)}, 270 }, 271 expectDelete: true, 272 }, 273 { 274 description: "scheduling onto tainted Node with toleration", 275 prevPod: addToleration(testutil.NewPod("pod1", ""), 1, -1), 276 newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, -1), 277 taintedNodes: map[string][]corev1.Taint{ 278 "node1": {createNoExecuteTaint(1)}, 279 }, 280 expectDelete: false, 281 }, 282 { 283 description: "removing toleration", 284 prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 285 newPod: testutil.NewPod("pod1", "node1"), 286 awaitForScheduledEviction: true, 287 taintedNodes: map[string][]corev1.Taint{ 288 "node1": {createNoExecuteTaint(1)}, 289 }, 290 expectDelete: true, 291 }, 292 { 293 description: "lengthening toleration shouldn't work", 294 prevPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 1), 295 newPod: addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 296 awaitForScheduledEviction: true, 297 taintedNodes: map[string][]corev1.Taint{ 298 "node1": {createNoExecuteTaint(1)}, 299 }, 300 expectDelete: true, 301 skipOnWindows: true, 302 }, 303 } 304 305 for _, item := range testCases { 306 t.Run(item.description, func(t *testing.T) { 307 if item.skipOnWindows && goruntime.GOOS == "windows" { 308 // TODO: remove skip once the flaking test has been fixed. 309 t.Skip("Skip flaking test on Windows.") 310 } 311 featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions) 312 ctx, cancel := context.WithCancel(context.Background()) 313 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.prevPod}}) 314 controller, podIndexer, _ := setupNewController(context.TODO(), fakeClientset) 315 controller.recorder = testutil.NewFakeRecorder() 316 controller.taintedNodes = item.taintedNodes 317 go controller.Run(ctx) 318 319 podIndexer.Add(item.prevPod) 320 controller.PodUpdated(nil, item.prevPod) 321 322 if item.awaitForScheduledEviction { 323 nsName := types.NamespacedName{Namespace: item.prevPod.Namespace, Name: item.prevPod.Name} 324 err := wait.PollImmediate(time.Millisecond*10, time.Second, func() (bool, error) { 325 scheduledEviction := controller.taintEvictionQueue.GetWorkerUnsafe(nsName.String()) 326 return scheduledEviction != nil, nil 327 }) 328 if err != nil { 329 t.Fatalf("Failed to await for scheduled eviction: %q", err) 330 } 331 } 332 333 podIndexer.Update(item.newPod) 334 controller.PodUpdated(item.prevPod, item.newPod) 335 336 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 337 cancel() 338 }) 339 } 340 } 341 342 func TestCreateNode(t *testing.T) { 343 testCases := []struct { 344 description string 345 pods []corev1.Pod 346 node *corev1.Node 347 expectPatch bool 348 expectDelete bool 349 }{ 350 { 351 description: "Creating Node matching already assigned Pod", 352 pods: []corev1.Pod{ 353 *testutil.NewPod("pod1", "node1"), 354 }, 355 node: testutil.NewNode("node1"), 356 expectPatch: false, 357 expectDelete: false, 358 }, 359 { 360 description: "Creating tainted Node matching already assigned Pod", 361 pods: []corev1.Pod{ 362 *testutil.NewPod("pod1", "node1"), 363 }, 364 node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 365 expectPatch: true, 366 expectDelete: true, 367 }, 368 { 369 description: "Creating tainted Node matching already assigned tolerating Pod", 370 pods: []corev1.Pod{ 371 *addToleration(testutil.NewPod("pod1", "node1"), 1, -1), 372 }, 373 node: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 374 expectPatch: false, 375 expectDelete: false, 376 }, 377 } 378 379 for _, item := range testCases { 380 ctx, cancel := context.WithCancel(context.Background()) 381 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 382 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 383 nodeIndexer.Add(item.node) 384 controller.recorder = testutil.NewFakeRecorder() 385 go controller.Run(ctx) 386 controller.NodeUpdated(nil, item.node) 387 388 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 389 390 cancel() 391 } 392 } 393 394 func TestDeleteNode(t *testing.T) { 395 ctx, cancel := context.WithCancel(context.Background()) 396 fakeClientset := fake.NewSimpleClientset() 397 controller, _, _ := setupNewController(ctx, fakeClientset) 398 controller.recorder = testutil.NewFakeRecorder() 399 controller.taintedNodes = map[string][]corev1.Taint{ 400 "node1": {createNoExecuteTaint(1)}, 401 } 402 go controller.Run(ctx) 403 controller.NodeUpdated(testutil.NewNode("node1"), nil) 404 405 // await until controller.taintedNodes is empty 406 err := wait.PollImmediate(10*time.Millisecond, time.Second, func() (bool, error) { 407 controller.taintedNodesLock.Lock() 408 defer controller.taintedNodesLock.Unlock() 409 _, ok := controller.taintedNodes["node1"] 410 return !ok, nil 411 }) 412 if err != nil { 413 t.Errorf("Failed to await for processing node deleted: %q", err) 414 } 415 cancel() 416 } 417 418 func TestUpdateNode(t *testing.T) { 419 testCases := []struct { 420 description string 421 pods []corev1.Pod 422 oldNode *corev1.Node 423 newNode *corev1.Node 424 expectPatch bool 425 expectDelete bool 426 additionalSleep time.Duration 427 enablePodDisruptionConditions bool 428 }{ 429 { 430 description: "Added taint, expect node patched and deleted when PodDisruptionConditions is enabled", 431 pods: []corev1.Pod{ 432 *testutil.NewPod("pod1", "node1"), 433 }, 434 oldNode: testutil.NewNode("node1"), 435 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 436 expectPatch: true, 437 expectDelete: true, 438 enablePodDisruptionConditions: true, 439 }, 440 { 441 description: "Added taint", 442 pods: []corev1.Pod{ 443 *testutil.NewPod("pod1", "node1"), 444 }, 445 oldNode: testutil.NewNode("node1"), 446 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 447 expectDelete: true, 448 }, 449 { 450 description: "Added tolerated taint", 451 pods: []corev1.Pod{ 452 *addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 453 }, 454 oldNode: testutil.NewNode("node1"), 455 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 456 expectDelete: false, 457 }, 458 { 459 description: "Only one added taint tolerated", 460 pods: []corev1.Pod{ 461 *addToleration(testutil.NewPod("pod1", "node1"), 1, 100), 462 }, 463 oldNode: testutil.NewNode("node1"), 464 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}), 465 expectDelete: true, 466 }, 467 { 468 description: "Taint removed", 469 pods: []corev1.Pod{ 470 *addToleration(testutil.NewPod("pod1", "node1"), 1, 1), 471 }, 472 oldNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 473 newNode: testutil.NewNode("node1"), 474 expectDelete: false, 475 additionalSleep: 1500 * time.Millisecond, 476 }, 477 { 478 description: "Pod with multiple tolerations are evicted when first one runs out", 479 pods: []corev1.Pod{ 480 { 481 ObjectMeta: metav1.ObjectMeta{ 482 Namespace: "default", 483 Name: "pod1", 484 }, 485 Spec: corev1.PodSpec{ 486 NodeName: "node1", 487 Tolerations: []corev1.Toleration{ 488 {Key: "testTaint1", Value: "test1", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{1}[0]}, 489 {Key: "testTaint2", Value: "test2", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{100}[0]}, 490 }, 491 }, 492 Status: corev1.PodStatus{ 493 Conditions: []corev1.PodCondition{ 494 { 495 Type: corev1.PodReady, 496 Status: corev1.ConditionTrue, 497 }, 498 }, 499 }, 500 }, 501 }, 502 oldNode: testutil.NewNode("node1"), 503 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}), 504 expectDelete: true, 505 }, 506 } 507 508 for _, item := range testCases { 509 t.Run(item.description, func(t *testing.T) { 510 featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions) 511 ctx, cancel := context.WithCancel(context.Background()) 512 defer cancel() 513 514 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 515 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 516 nodeIndexer.Add(item.newNode) 517 controller.recorder = testutil.NewFakeRecorder() 518 go controller.Run(ctx) 519 controller.NodeUpdated(item.oldNode, item.newNode) 520 521 if item.additionalSleep > 0 { 522 time.Sleep(item.additionalSleep) 523 } 524 525 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 526 }) 527 } 528 } 529 530 func TestUpdateNodeWithMultipleTaints(t *testing.T) { 531 taint1 := createNoExecuteTaint(1) 532 taint2 := createNoExecuteTaint(2) 533 534 minute := int64(60) 535 pod := testutil.NewPod("pod1", "node1") 536 pod.Spec.Tolerations = []corev1.Toleration{ 537 {Key: taint1.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute}, 538 {Key: taint2.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &minute}, 539 } 540 podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} 541 542 untaintedNode := testutil.NewNode("node1") 543 544 doubleTaintedNode := testutil.NewNode("node1") 545 doubleTaintedNode.Spec.Taints = []corev1.Taint{taint1, taint2} 546 547 singleTaintedNode := testutil.NewNode("node1") 548 singleTaintedNode.Spec.Taints = []corev1.Taint{taint1} 549 550 ctx, cancel := context.WithCancel(context.TODO()) 551 fakeClientset := fake.NewSimpleClientset(pod) 552 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 553 controller.recorder = testutil.NewFakeRecorder() 554 go controller.Run(ctx) 555 556 // no taint 557 nodeIndexer.Add(untaintedNode) 558 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 559 // verify pod is not queued for deletion 560 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil { 561 t.Fatalf("pod queued for deletion with no taints") 562 } 563 564 // no taint -> infinitely tolerated taint 565 nodeIndexer.Update(singleTaintedNode) 566 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 567 // verify pod is not queued for deletion 568 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil { 569 t.Fatalf("pod queued for deletion with permanently tolerated taint") 570 } 571 572 // infinitely tolerated taint -> temporarily tolerated taint 573 nodeIndexer.Update(doubleTaintedNode) 574 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 575 // verify pod is queued for deletion 576 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) == nil { 577 t.Fatalf("pod not queued for deletion after addition of temporarily tolerated taint") 578 } 579 580 // temporarily tolerated taint -> infinitely tolerated taint 581 nodeIndexer.Update(singleTaintedNode) 582 controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"}) 583 // verify pod is not queued for deletion 584 if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil { 585 t.Fatalf("pod queued for deletion after removal of temporarily tolerated taint") 586 } 587 588 // verify pod is not deleted 589 for _, action := range fakeClientset.Actions() { 590 if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" { 591 t.Error("Unexpected deletion") 592 } 593 } 594 cancel() 595 } 596 597 func TestUpdateNodeWithMultiplePods(t *testing.T) { 598 testCases := []struct { 599 description string 600 pods []corev1.Pod 601 oldNode *corev1.Node 602 newNode *corev1.Node 603 expectedDeleteTimes durationSlice 604 }{ 605 { 606 description: "Pods with different toleration times are evicted appropriately", 607 pods: []corev1.Pod{ 608 *testutil.NewPod("pod1", "node1"), 609 *addToleration(testutil.NewPod("pod2", "node1"), 1, 1), 610 *addToleration(testutil.NewPod("pod3", "node1"), 1, -1), 611 }, 612 oldNode: testutil.NewNode("node1"), 613 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 614 expectedDeleteTimes: durationSlice{ 615 {[]string{"pod1"}, 0}, 616 {[]string{"pod2"}, time.Second}, 617 }, 618 }, 619 { 620 description: "Evict all pods not matching all taints instantly", 621 pods: []corev1.Pod{ 622 *testutil.NewPod("pod1", "node1"), 623 *addToleration(testutil.NewPod("pod2", "node1"), 1, 1), 624 *addToleration(testutil.NewPod("pod3", "node1"), 1, -1), 625 }, 626 oldNode: testutil.NewNode("node1"), 627 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}), 628 expectedDeleteTimes: durationSlice{ 629 {[]string{"pod1", "pod2", "pod3"}, 0}, 630 }, 631 }, 632 } 633 634 for _, item := range testCases { 635 t.Run(item.description, func(t *testing.T) { 636 t.Logf("Starting testcase %q", item.description) 637 ctx, cancel := context.WithCancel(context.Background()) 638 defer cancel() 639 640 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 641 sort.Sort(item.expectedDeleteTimes) 642 controller, _, nodeIndexer := setupNewController(ctx, fakeClientset) 643 nodeIndexer.Add(item.newNode) 644 controller.recorder = testutil.NewFakeRecorder() 645 go controller.Run(ctx) 646 controller.NodeUpdated(item.oldNode, item.newNode) 647 648 startedAt := time.Now() 649 for i := range item.expectedDeleteTimes { 650 if i == 0 || item.expectedDeleteTimes[i-1].timestamp != item.expectedDeleteTimes[i].timestamp { 651 // compute a grace duration to give controller time to process updates. Choose big 652 // enough intervals in the test cases above to avoid flakes. 653 var increment time.Duration 654 if i == len(item.expectedDeleteTimes)-1 || item.expectedDeleteTimes[i+1].timestamp == item.expectedDeleteTimes[i].timestamp { 655 increment = 500 * time.Millisecond 656 } else { 657 increment = ((item.expectedDeleteTimes[i+1].timestamp - item.expectedDeleteTimes[i].timestamp) / time.Duration(2)) 658 } 659 660 sleepTime := item.expectedDeleteTimes[i].timestamp - time.Since(startedAt) + increment 661 if sleepTime < 0 { 662 sleepTime = 0 663 } 664 t.Logf("Sleeping for %v", sleepTime) 665 time.Sleep(sleepTime) 666 } 667 668 for delay, podName := range item.expectedDeleteTimes[i].names { 669 deleted := false 670 for _, action := range fakeClientset.Actions() { 671 deleteAction, ok := action.(clienttesting.DeleteActionImpl) 672 if !ok { 673 t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb()) 674 continue 675 } 676 if deleteAction.GetResource().Resource != "pods" { 677 continue 678 } 679 if podName == deleteAction.GetName() { 680 deleted = true 681 } 682 } 683 if !deleted { 684 t.Errorf("Failed to deleted pod %v after %v", podName, delay) 685 } 686 } 687 for _, action := range fakeClientset.Actions() { 688 deleteAction, ok := action.(clienttesting.DeleteActionImpl) 689 if !ok { 690 t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb()) 691 continue 692 } 693 if deleteAction.GetResource().Resource != "pods" { 694 continue 695 } 696 deletedPodName := deleteAction.GetName() 697 expected := false 698 for _, podName := range item.expectedDeleteTimes[i].names { 699 if podName == deletedPodName { 700 expected = true 701 } 702 } 703 if !expected { 704 t.Errorf("Pod %v was deleted even though it shouldn't have", deletedPodName) 705 } 706 } 707 fakeClientset.ClearActions() 708 } 709 }) 710 } 711 } 712 713 func TestGetMinTolerationTime(t *testing.T) { 714 one := int64(1) 715 two := int64(2) 716 oneSec := 1 * time.Second 717 718 tests := []struct { 719 tolerations []corev1.Toleration 720 expected time.Duration 721 }{ 722 { 723 tolerations: []corev1.Toleration{}, 724 expected: 0, 725 }, 726 { 727 tolerations: []corev1.Toleration{ 728 { 729 TolerationSeconds: nil, 730 }, 731 }, 732 expected: -1, 733 }, 734 { 735 tolerations: []corev1.Toleration{ 736 { 737 TolerationSeconds: &one, 738 }, 739 { 740 TolerationSeconds: &two, 741 }, 742 }, 743 expected: oneSec, 744 }, 745 746 { 747 tolerations: []corev1.Toleration{ 748 { 749 TolerationSeconds: &one, 750 }, 751 { 752 TolerationSeconds: nil, 753 }, 754 }, 755 expected: oneSec, 756 }, 757 { 758 tolerations: []corev1.Toleration{ 759 { 760 TolerationSeconds: nil, 761 }, 762 { 763 TolerationSeconds: &one, 764 }, 765 }, 766 expected: oneSec, 767 }, 768 } 769 770 for _, test := range tests { 771 got := getMinTolerationTime(test.tolerations) 772 if got != test.expected { 773 t.Errorf("Incorrect min toleration time: got %v, expected %v", got, test.expected) 774 } 775 } 776 } 777 778 // TestEventualConsistency verifies if getPodsAssignedToNode returns incomplete data 779 // (e.g. due to watch latency), it will reconcile the remaining pods eventually. 780 // This scenario is partially covered by TestUpdatePods, but given this is an important 781 // property of TaintManager, it's better to have explicit test for this. 782 func TestEventualConsistency(t *testing.T) { 783 testCases := []struct { 784 description string 785 pods []corev1.Pod 786 prevPod *corev1.Pod 787 newPod *corev1.Pod 788 oldNode *corev1.Node 789 newNode *corev1.Node 790 expectPatch bool 791 expectDelete bool 792 }{ 793 { 794 description: "existing pod2 scheduled onto tainted Node", 795 pods: []corev1.Pod{ 796 *testutil.NewPod("pod1", "node1"), 797 }, 798 prevPod: testutil.NewPod("pod2", ""), 799 newPod: testutil.NewPod("pod2", "node1"), 800 oldNode: testutil.NewNode("node1"), 801 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 802 expectPatch: true, 803 expectDelete: true, 804 }, 805 { 806 description: "existing pod2 with taint toleration scheduled onto tainted Node", 807 pods: []corev1.Pod{ 808 *testutil.NewPod("pod1", "node1"), 809 }, 810 prevPod: addToleration(testutil.NewPod("pod2", ""), 1, 100), 811 newPod: addToleration(testutil.NewPod("pod2", "node1"), 1, 100), 812 oldNode: testutil.NewNode("node1"), 813 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 814 expectPatch: true, 815 expectDelete: true, 816 }, 817 { 818 description: "new pod2 created on tainted Node", 819 pods: []corev1.Pod{ 820 *testutil.NewPod("pod1", "node1"), 821 }, 822 prevPod: nil, 823 newPod: testutil.NewPod("pod2", "node1"), 824 oldNode: testutil.NewNode("node1"), 825 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 826 expectPatch: true, 827 expectDelete: true, 828 }, 829 { 830 description: "new pod2 with tait toleration created on tainted Node", 831 pods: []corev1.Pod{ 832 *testutil.NewPod("pod1", "node1"), 833 }, 834 prevPod: nil, 835 newPod: addToleration(testutil.NewPod("pod2", "node1"), 1, 100), 836 oldNode: testutil.NewNode("node1"), 837 newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}), 838 expectPatch: true, 839 expectDelete: true, 840 }, 841 } 842 843 for _, item := range testCases { 844 t.Run(item.description, func(t *testing.T) { 845 ctx, cancel := context.WithCancel(context.Background()) 846 defer cancel() 847 848 fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods}) 849 controller, podIndexer, nodeIndexer := setupNewController(ctx, fakeClientset) 850 nodeIndexer.Add(item.newNode) 851 controller.recorder = testutil.NewFakeRecorder() 852 go controller.Run(ctx) 853 854 if item.prevPod != nil { 855 podIndexer.Add(item.prevPod) 856 controller.PodUpdated(nil, item.prevPod) 857 } 858 859 // First we simulate NodeUpdate that should delete 'pod1'. It doesn't know about 'pod2' yet. 860 controller.NodeUpdated(item.oldNode, item.newNode) 861 862 verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete) 863 fakeClientset.ClearActions() 864 865 // And now the delayed update of 'pod2' comes to the TaintManager. We should delete it as well. 866 podIndexer.Update(item.newPod) 867 controller.PodUpdated(item.prevPod, item.newPod) 868 // wait a bit 869 time.Sleep(timeForControllerToProgressForSanityCheck) 870 }) 871 } 872 } 873 874 func verifyPodActions(t *testing.T, description string, fakeClientset *fake.Clientset, expectPatch, expectDelete bool) { 875 t.Helper() 876 podPatched := false 877 podDeleted := false 878 // use Poll instead of PollImmediate to give some processing time to the controller that the expected 879 // actions are likely to be already sent 880 err := wait.Poll(10*time.Millisecond, 5*time.Second, func() (bool, error) { 881 for _, action := range fakeClientset.Actions() { 882 if action.GetVerb() == "patch" && action.GetResource().Resource == "pods" { 883 podPatched = true 884 } 885 if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" { 886 podDeleted = true 887 } 888 } 889 return podPatched == expectPatch && podDeleted == expectDelete, nil 890 }) 891 if err != nil { 892 t.Errorf("Failed waiting for the expected actions: %q", err) 893 } 894 if podPatched != expectPatch { 895 t.Errorf("[%v]Unexpected test result. Expected patch %v, got %v", description, expectPatch, podPatched) 896 } 897 if podDeleted != expectDelete { 898 t.Errorf("[%v]Unexpected test result. Expected delete %v, got %v", description, expectDelete, podDeleted) 899 } 900 } 901 902 // TestPodDeletionEvent Verify that the output events are as expected 903 func TestPodDeletionEvent(t *testing.T) { 904 f := func(path cmp.Path) bool { 905 switch path.String() { 906 // These fields change at runtime, so ignore it 907 case "LastTimestamp", "FirstTimestamp", "ObjectMeta.Name": 908 return true 909 } 910 return false 911 } 912 913 t.Run("emitPodDeletionEvent", func(t *testing.T) { 914 controller := &Controller{} 915 recorder := testutil.NewFakeRecorder() 916 controller.recorder = recorder 917 controller.emitPodDeletionEvent(types.NamespacedName{ 918 Name: "test", 919 Namespace: "test", 920 }) 921 want := []*corev1.Event{ 922 { 923 ObjectMeta: metav1.ObjectMeta{ 924 Namespace: "test", 925 }, 926 InvolvedObject: corev1.ObjectReference{ 927 Kind: "Pod", 928 APIVersion: "v1", 929 Namespace: "test", 930 Name: "test", 931 }, 932 Reason: "TaintManagerEviction", 933 Type: "Normal", 934 Count: 1, 935 Message: "Marking for deletion Pod test/test", 936 Source: corev1.EventSource{Component: "nodeControllerTest"}, 937 }, 938 } 939 if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 { 940 t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff) 941 } 942 }) 943 944 t.Run("emitCancelPodDeletionEvent", func(t *testing.T) { 945 controller := &Controller{} 946 recorder := testutil.NewFakeRecorder() 947 controller.recorder = recorder 948 controller.emitCancelPodDeletionEvent(types.NamespacedName{ 949 Name: "test", 950 Namespace: "test", 951 }) 952 want := []*corev1.Event{ 953 { 954 ObjectMeta: metav1.ObjectMeta{ 955 Namespace: "test", 956 }, 957 InvolvedObject: corev1.ObjectReference{ 958 Kind: "Pod", 959 APIVersion: "v1", 960 Namespace: "test", 961 Name: "test", 962 }, 963 Reason: "TaintManagerEviction", 964 Type: "Normal", 965 Count: 1, 966 Message: "Cancelling deletion of Pod test/test", 967 Source: corev1.EventSource{Component: "nodeControllerTest"}, 968 }, 969 } 970 if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 { 971 t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff) 972 } 973 }) 974 }