k8s.io/kubernetes@v1.29.3/test/e2e/node/taints.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package node 18 19 import ( 20 "context" 21 "time" 22 23 v1 "k8s.io/api/core/v1" 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 "k8s.io/apimachinery/pkg/labels" 26 "k8s.io/apimachinery/pkg/runtime" 27 "k8s.io/apimachinery/pkg/watch" 28 clientset "k8s.io/client-go/kubernetes" 29 "k8s.io/client-go/tools/cache" 30 "k8s.io/kubernetes/test/e2e/framework" 31 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 32 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 33 testutils "k8s.io/kubernetes/test/utils" 34 imageutils "k8s.io/kubernetes/test/utils/image" 35 admissionapi "k8s.io/pod-security-admission/api" 36 37 "github.com/onsi/ginkgo/v2" 38 // ensure libs have a chance to initialize 39 _ "github.com/stretchr/testify/assert" 40 ) 41 42 var ( 43 pauseImage = imageutils.GetE2EImage(imageutils.Pause) 44 ) 45 46 const ( 47 testFinalizer = "example.com/test-finalizer" 48 ) 49 50 func getTestTaint() v1.Taint { 51 now := metav1.Now() 52 return v1.Taint{ 53 Key: "kubernetes.io/e2e-evict-taint-key", 54 Value: "evictTaintVal", 55 Effect: v1.TaintEffectNoExecute, 56 TimeAdded: &now, 57 } 58 } 59 60 // Create a default pod for this test, with argument saying if the Pod should have 61 // toleration for Taits used in this test. 62 func createPodForTaintsTest(hasToleration bool, tolerationSeconds int, podName, podLabel, ns string) *v1.Pod { 63 grace := int64(1) 64 if !hasToleration { 65 return &v1.Pod{ 66 ObjectMeta: metav1.ObjectMeta{ 67 Name: podName, 68 Namespace: ns, 69 Labels: map[string]string{"group": podLabel}, 70 DeletionGracePeriodSeconds: &grace, 71 }, 72 Spec: v1.PodSpec{ 73 Containers: []v1.Container{ 74 { 75 Name: "pause", 76 Image: pauseImage, 77 }, 78 }, 79 }, 80 } 81 } 82 if tolerationSeconds <= 0 { 83 return &v1.Pod{ 84 ObjectMeta: metav1.ObjectMeta{ 85 Name: podName, 86 Namespace: ns, 87 Labels: map[string]string{"group": podLabel}, 88 DeletionGracePeriodSeconds: &grace, 89 // default - tolerate forever 90 }, 91 Spec: v1.PodSpec{ 92 Containers: []v1.Container{ 93 { 94 Name: "pause", 95 Image: pauseImage, 96 }, 97 }, 98 Tolerations: []v1.Toleration{{Key: "kubernetes.io/e2e-evict-taint-key", Value: "evictTaintVal", Effect: v1.TaintEffectNoExecute}}, 99 }, 100 } 101 } 102 ts := int64(tolerationSeconds) 103 return &v1.Pod{ 104 ObjectMeta: metav1.ObjectMeta{ 105 Name: podName, 106 Namespace: ns, 107 Labels: map[string]string{"group": podLabel}, 108 DeletionGracePeriodSeconds: &grace, 109 }, 110 Spec: v1.PodSpec{ 111 Containers: []v1.Container{ 112 { 113 Name: "pause", 114 Image: pauseImage, 115 }, 116 }, 117 // default - tolerate forever 118 Tolerations: []v1.Toleration{{Key: "kubernetes.io/e2e-evict-taint-key", Value: "evictTaintVal", Effect: v1.TaintEffectNoExecute, TolerationSeconds: &ts}}, 119 }, 120 } 121 } 122 123 // Creates and starts a controller (informer) that watches updates on a pod in given namespace with given name. It puts a new 124 // struct into observedDeletion channel for every deletion it sees. 125 func createTestController(ctx context.Context, cs clientset.Interface, observedDeletions chan string, podLabel, ns string) { 126 _, controller := cache.NewInformer( 127 &cache.ListWatch{ 128 ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { 129 options.LabelSelector = labels.SelectorFromSet(labels.Set{"group": podLabel}).String() 130 obj, err := cs.CoreV1().Pods(ns).List(ctx, options) 131 return runtime.Object(obj), err 132 }, 133 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 134 options.LabelSelector = labels.SelectorFromSet(labels.Set{"group": podLabel}).String() 135 return cs.CoreV1().Pods(ns).Watch(ctx, options) 136 }, 137 }, 138 &v1.Pod{}, 139 0, 140 cache.ResourceEventHandlerFuncs{ 141 DeleteFunc: func(oldObj interface{}) { 142 if delPod, ok := oldObj.(*v1.Pod); ok { 143 observedDeletions <- delPod.Name 144 } else { 145 observedDeletions <- "" 146 } 147 }, 148 }, 149 ) 150 framework.Logf("Starting informer...") 151 go controller.Run(ctx.Done()) 152 } 153 154 const ( 155 kubeletPodDeletionDelaySeconds = 60 156 additionalWaitPerDeleteSeconds = 5 157 ) 158 159 // Tests the behavior of NoExecuteTaintManager. Following scenarios are included: 160 // - eviction of non-tolerating pods from a tainted node, 161 // - lack of eviction of tolerating pods from a tainted node, 162 // - delayed eviction of short-tolerating pod from a tainted node, 163 // - lack of eviction of short-tolerating pod after taint removal. 164 var _ = SIGDescribe("NoExecuteTaintManager Single Pod", framework.WithSerial(), func() { 165 var cs clientset.Interface 166 var ns string 167 f := framework.NewDefaultFramework("taint-single-pod") 168 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 169 170 ginkgo.BeforeEach(func(ctx context.Context) { 171 cs = f.ClientSet 172 ns = f.Namespace.Name 173 174 e2enode.WaitForTotalHealthy(ctx, cs, time.Minute) 175 176 err := framework.CheckTestingNSDeletedExcept(ctx, cs, ns) 177 framework.ExpectNoError(err) 178 }) 179 180 // 1. Run a pod 181 // 2. Taint the node running this pod with a no-execute taint 182 // 3. See if pod will get evicted 183 ginkgo.It("evicts pods from tainted nodes", func(ctx context.Context) { 184 podName := "taint-eviction-1" 185 pod := createPodForTaintsTest(false, 0, podName, podName, ns) 186 observedDeletions := make(chan string, 100) 187 createTestController(ctx, cs, observedDeletions, podName, ns) 188 189 ginkgo.By("Starting pod...") 190 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 191 framework.ExpectNoError(err) 192 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 193 194 ginkgo.By("Trying to apply a taint on the Node") 195 testTaint := getTestTaint() 196 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 197 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 198 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 199 200 // Wait a bit 201 ginkgo.By("Waiting for Pod to be deleted") 202 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 203 select { 204 case <-timeoutChannel: 205 framework.Failf("Failed to evict Pod") 206 case <-observedDeletions: 207 framework.Logf("Noticed Pod eviction. Test successful") 208 } 209 }) 210 211 // 1. Run a pod with toleration 212 // 2. Taint the node running this pod with a no-execute taint 213 // 3. See if pod won't get evicted 214 ginkgo.It("doesn't evict pod with tolerations from tainted nodes", func(ctx context.Context) { 215 podName := "taint-eviction-2" 216 pod := createPodForTaintsTest(true, 0, podName, podName, ns) 217 observedDeletions := make(chan string, 100) 218 createTestController(ctx, cs, observedDeletions, podName, ns) 219 220 ginkgo.By("Starting pod...") 221 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 222 framework.ExpectNoError(err) 223 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 224 225 ginkgo.By("Trying to apply a taint on the Node") 226 testTaint := getTestTaint() 227 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 228 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 229 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 230 231 // Wait a bit 232 ginkgo.By("Waiting for Pod to be deleted") 233 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 234 select { 235 case <-timeoutChannel: 236 framework.Logf("Pod wasn't evicted. Test successful") 237 case <-observedDeletions: 238 framework.Failf("Pod was evicted despite toleration") 239 } 240 }) 241 242 // 1. Run a pod with a finite toleration 243 // 2. Taint the node running this pod with a no-execute taint 244 // 3. See if pod won't get evicted before toleration time runs out 245 // 4. See if pod will get evicted after toleration time runs out 246 ginkgo.It("eventually evict pod with finite tolerations from tainted nodes", func(ctx context.Context) { 247 podName := "taint-eviction-3" 248 pod := createPodForTaintsTest(true, kubeletPodDeletionDelaySeconds+2*additionalWaitPerDeleteSeconds, podName, podName, ns) 249 observedDeletions := make(chan string, 100) 250 createTestController(ctx, cs, observedDeletions, podName, ns) 251 252 ginkgo.By("Starting pod...") 253 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 254 framework.ExpectNoError(err) 255 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 256 257 ginkgo.By("Trying to apply a taint on the Node") 258 testTaint := getTestTaint() 259 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 260 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 261 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 262 263 // Wait a bit 264 ginkgo.By("Waiting to see if a Pod won't be deleted") 265 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 266 select { 267 case <-timeoutChannel: 268 framework.Logf("Pod wasn't evicted") 269 case <-observedDeletions: 270 framework.Failf("Pod was evicted despite toleration") 271 return 272 } 273 ginkgo.By("Waiting for Pod to be deleted") 274 timeoutChannel = time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 275 select { 276 case <-timeoutChannel: 277 framework.Failf("Pod wasn't evicted") 278 case <-observedDeletions: 279 framework.Logf("Pod was evicted after toleration time run out. Test successful") 280 return 281 } 282 }) 283 284 /* 285 Release: v1.16 286 Testname: Taint, Pod Eviction on taint removal 287 Description: The Pod with toleration timeout scheduled on a tainted Node MUST not be 288 evicted if the taint is removed before toleration time ends. 289 */ 290 framework.ConformanceIt("removing taint cancels eviction", f.WithDisruptive(), func(ctx context.Context) { 291 podName := "taint-eviction-4" 292 pod := createPodForTaintsTest(true, 2*additionalWaitPerDeleteSeconds, podName, podName, ns) 293 observedDeletions := make(chan string, 100) 294 createTestController(ctx, cs, observedDeletions, podName, ns) 295 296 // 1. Run a pod with short toleration 297 ginkgo.By("Starting pod...") 298 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 299 framework.ExpectNoError(err) 300 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 301 302 // 2. Taint the node running this pod with a no-execute taint 303 ginkgo.By("Trying to apply a taint on the Node") 304 testTaint := getTestTaint() 305 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 306 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 307 taintRemoved := false 308 ginkgo.DeferCleanup(func(ctx context.Context) { 309 if !taintRemoved { 310 e2enode.RemoveTaintOffNode(ctx, cs, nodeName, testTaint) 311 } 312 }) 313 314 // 3. Wait some time 315 ginkgo.By("Waiting short time to make sure Pod is queued for deletion") 316 timeoutChannel := time.NewTimer(additionalWaitPerDeleteSeconds).C 317 select { 318 case <-timeoutChannel: 319 framework.Logf("Pod wasn't evicted. Proceeding") 320 case <-observedDeletions: 321 framework.Failf("Pod was evicted despite toleration") 322 return 323 } 324 325 // 4. Remove the taint 326 framework.Logf("Removing taint from Node") 327 e2enode.RemoveTaintOffNode(ctx, cs, nodeName, testTaint) 328 taintRemoved = true 329 330 // 5. See if Pod won't be evicted. 331 ginkgo.By("Waiting some time to make sure that toleration time passed.") 332 timeoutChannel = time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+3*additionalWaitPerDeleteSeconds) * time.Second).C 333 select { 334 case <-timeoutChannel: 335 framework.Logf("Pod wasn't evicted. Test successful") 336 case <-observedDeletions: 337 framework.Failf("Pod was evicted despite toleration") 338 } 339 }) 340 341 // 1. Run a pod with finalizer 342 // 2. Taint the node running this pod with a no-execute taint 343 // 3. See if pod will get evicted and has the pod disruption condition 344 // 4. Remove the finalizer so that the pod can be deleted by GC 345 ginkgo.It("pods evicted from tainted nodes have pod disruption condition", func(ctx context.Context) { 346 podName := "taint-eviction-pod-disruption" 347 pod := createPodForTaintsTest(false, 0, podName, podName, ns) 348 pod.Finalizers = append(pod.Finalizers, testFinalizer) 349 350 ginkgo.By("Starting pod...") 351 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 352 framework.ExpectNoError(err) 353 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 354 355 ginkgo.DeferCleanup(e2epod.NewPodClient(f).RemoveFinalizer, pod.Name, testFinalizer) 356 357 ginkgo.By("Trying to apply a taint on the Node") 358 testTaint := getTestTaint() 359 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 360 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 361 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 362 363 ginkgo.By("Waiting for Pod to be terminating") 364 timeout := time.Duration(kubeletPodDeletionDelaySeconds+3*additionalWaitPerDeleteSeconds) * time.Second 365 err = e2epod.WaitForPodTerminatingInNamespaceTimeout(ctx, f.ClientSet, pod.Name, pod.Namespace, timeout) 366 framework.ExpectNoError(err) 367 368 ginkgo.By("Verifying the pod has the pod disruption condition") 369 e2epod.VerifyPodHasConditionWithType(ctx, f, pod, v1.DisruptionTarget) 370 }) 371 }) 372 373 var _ = SIGDescribe("NoExecuteTaintManager Multiple Pods", framework.WithSerial(), func() { 374 var cs clientset.Interface 375 var ns string 376 f := framework.NewDefaultFramework("taint-multiple-pods") 377 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 378 379 ginkgo.BeforeEach(func(ctx context.Context) { 380 cs = f.ClientSet 381 ns = f.Namespace.Name 382 383 e2enode.WaitForTotalHealthy(ctx, cs, time.Minute) 384 385 err := framework.CheckTestingNSDeletedExcept(ctx, cs, ns) 386 framework.ExpectNoError(err) 387 }) 388 389 // 1. Run two pods; one with toleration, one without toleration 390 // 2. Taint the nodes running those pods with a no-execute taint 391 // 3. See if pod-without-toleration get evicted, and pod-with-toleration is kept 392 ginkgo.It("only evicts pods without tolerations from tainted nodes", func(ctx context.Context) { 393 podGroup := "taint-eviction-a" 394 observedDeletions := make(chan string, 100) 395 createTestController(ctx, cs, observedDeletions, podGroup, ns) 396 397 pod1 := createPodForTaintsTest(false, 0, podGroup+"1", podGroup, ns) 398 pod2 := createPodForTaintsTest(true, 0, podGroup+"2", podGroup, ns) 399 400 ginkgo.By("Starting pods...") 401 nodeName1, err := testutils.RunPodAndGetNodeName(ctx, cs, pod1, 2*time.Minute) 402 framework.ExpectNoError(err) 403 framework.Logf("Pod1 is running on %v. Tainting Node", nodeName1) 404 nodeName2, err := testutils.RunPodAndGetNodeName(ctx, cs, pod2, 2*time.Minute) 405 framework.ExpectNoError(err) 406 framework.Logf("Pod2 is running on %v. Tainting Node", nodeName2) 407 408 ginkgo.By("Trying to apply a taint on the Nodes") 409 testTaint := getTestTaint() 410 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName1, testTaint) 411 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName1, &testTaint) 412 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName1, testTaint) 413 if nodeName2 != nodeName1 { 414 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName2, testTaint) 415 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName2, &testTaint) 416 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName2, testTaint) 417 } 418 419 // Wait a bit 420 ginkgo.By("Waiting for Pod1 to be deleted") 421 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 422 var evicted int 423 for { 424 select { 425 case <-timeoutChannel: 426 if evicted == 0 { 427 framework.Failf("Failed to evict Pod1.") 428 } else if evicted == 2 { 429 framework.Failf("Pod1 is evicted. But unexpected Pod2 also get evicted.") 430 } 431 return 432 case podName := <-observedDeletions: 433 evicted++ 434 if podName == podGroup+"1" { 435 framework.Logf("Noticed Pod %q gets evicted.", podName) 436 } else if podName == podGroup+"2" { 437 framework.Failf("Unexpected Pod %q gets evicted.", podName) 438 return 439 } 440 } 441 } 442 }) 443 444 /* 445 Release: v1.16 446 Testname: Pod Eviction, Toleration limits 447 Description: In a multi-pods scenario with tolerationSeconds, the pods MUST be evicted as per 448 the toleration time limit. 449 */ 450 framework.ConformanceIt("evicts pods with minTolerationSeconds", f.WithDisruptive(), func(ctx context.Context) { 451 podGroup := "taint-eviction-b" 452 observedDeletions := make(chan string, 100) 453 createTestController(ctx, cs, observedDeletions, podGroup, ns) 454 455 // 1. Run two pods both with toleration; one with tolerationSeconds=5, the other with 25 456 pod1 := createPodForTaintsTest(true, additionalWaitPerDeleteSeconds, podGroup+"1", podGroup, ns) 457 pod2 := createPodForTaintsTest(true, 5*additionalWaitPerDeleteSeconds, podGroup+"2", podGroup, ns) 458 459 ginkgo.By("Starting pods...") 460 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod1, 2*time.Minute) 461 framework.ExpectNoError(err) 462 node, err := cs.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 463 framework.ExpectNoError(err) 464 nodeHostNameLabel, ok := node.GetObjectMeta().GetLabels()["kubernetes.io/hostname"] 465 if !ok { 466 framework.Failf("error getting kubernetes.io/hostname label on node %s", nodeName) 467 } 468 framework.ExpectNoError(err) 469 framework.Logf("Pod1 is running on %v. Tainting Node", nodeName) 470 // ensure pod2 lands on the same node as pod1 471 pod2.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": nodeHostNameLabel} 472 _, err = testutils.RunPodAndGetNodeName(ctx, cs, pod2, 2*time.Minute) 473 framework.ExpectNoError(err) 474 // Wait for pods to be running state before eviction happens 475 framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, pod1)) 476 framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, pod2)) 477 framework.Logf("Pod2 is running on %v. Tainting Node", nodeName) 478 479 // 2. Taint the nodes running those pods with a no-execute taint 480 ginkgo.By("Trying to apply a taint on the Node") 481 testTaint := getTestTaint() 482 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 483 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 484 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 485 486 // 3. Wait to see if both pods get evicted in between [5, 25] seconds 487 ginkgo.By("Waiting for Pod1 and Pod2 to be deleted") 488 // On Windows hosts, we're noticing that the pods are taking more time to get deleted, so having larger timeout 489 // is good 490 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+10*additionalWaitPerDeleteSeconds) * time.Second).C 491 var evicted int 492 for evicted != 2 { 493 select { 494 case <-timeoutChannel: 495 framework.Failf("Failed to evict all Pods. %d pod(s) is not evicted.", 2-evicted) 496 return 497 case podName := <-observedDeletions: 498 framework.Logf("Noticed Pod %q gets evicted.", podName) 499 evicted++ 500 } 501 } 502 }) 503 })