k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/node/taints.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package node 18 19 import ( 20 "context" 21 "time" 22 23 v1 "k8s.io/api/core/v1" 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 "k8s.io/apimachinery/pkg/labels" 26 "k8s.io/apimachinery/pkg/runtime" 27 "k8s.io/apimachinery/pkg/watch" 28 clientset "k8s.io/client-go/kubernetes" 29 "k8s.io/client-go/tools/cache" 30 "k8s.io/kubernetes/test/e2e/framework" 31 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 32 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 33 testutils "k8s.io/kubernetes/test/utils" 34 imageutils "k8s.io/kubernetes/test/utils/image" 35 admissionapi "k8s.io/pod-security-admission/api" 36 37 "github.com/onsi/ginkgo/v2" 38 // ensure libs have a chance to initialize 39 _ "github.com/stretchr/testify/assert" 40 ) 41 42 const ( 43 testFinalizer = "example.com/test-finalizer" 44 ) 45 46 func getTestTaint() v1.Taint { 47 now := metav1.Now() 48 return v1.Taint{ 49 Key: "kubernetes.io/e2e-evict-taint-key", 50 Value: "evictTaintVal", 51 Effect: v1.TaintEffectNoExecute, 52 TimeAdded: &now, 53 } 54 } 55 56 // Create a default pod for this test, with argument saying if the Pod should have 57 // toleration for Taits used in this test. 58 func createPodForTaintsTest(hasToleration bool, tolerationSeconds int, podName, podLabel, ns string) *v1.Pod { 59 grace := int64(1) 60 if !hasToleration { 61 return &v1.Pod{ 62 ObjectMeta: metav1.ObjectMeta{ 63 Name: podName, 64 Namespace: ns, 65 Labels: map[string]string{"group": podLabel}, 66 DeletionGracePeriodSeconds: &grace, 67 }, 68 Spec: v1.PodSpec{ 69 Containers: []v1.Container{ 70 { 71 Name: "pause", 72 Image: imageutils.GetE2EImage(imageutils.Pause), 73 }, 74 }, 75 }, 76 } 77 } 78 if tolerationSeconds <= 0 { 79 return &v1.Pod{ 80 ObjectMeta: metav1.ObjectMeta{ 81 Name: podName, 82 Namespace: ns, 83 Labels: map[string]string{"group": podLabel}, 84 DeletionGracePeriodSeconds: &grace, 85 // default - tolerate forever 86 }, 87 Spec: v1.PodSpec{ 88 Containers: []v1.Container{ 89 { 90 Name: "pause", 91 Image: imageutils.GetE2EImage(imageutils.Pause), 92 }, 93 }, 94 Tolerations: []v1.Toleration{{Key: "kubernetes.io/e2e-evict-taint-key", Value: "evictTaintVal", Effect: v1.TaintEffectNoExecute}}, 95 }, 96 } 97 } 98 ts := int64(tolerationSeconds) 99 return &v1.Pod{ 100 ObjectMeta: metav1.ObjectMeta{ 101 Name: podName, 102 Namespace: ns, 103 Labels: map[string]string{"group": podLabel}, 104 DeletionGracePeriodSeconds: &grace, 105 }, 106 Spec: v1.PodSpec{ 107 Containers: []v1.Container{ 108 { 109 Name: "pause", 110 Image: imageutils.GetE2EImage(imageutils.Pause), 111 }, 112 }, 113 // default - tolerate forever 114 Tolerations: []v1.Toleration{{Key: "kubernetes.io/e2e-evict-taint-key", Value: "evictTaintVal", Effect: v1.TaintEffectNoExecute, TolerationSeconds: &ts}}, 115 }, 116 } 117 } 118 119 // Creates and starts a controller (informer) that watches updates on a pod in given namespace with given name. It puts a new 120 // struct into observedDeletion channel for every deletion it sees. 121 func createTestController(ctx context.Context, cs clientset.Interface, observedDeletions chan string, podLabel, ns string) { 122 _, controller := cache.NewInformer( 123 &cache.ListWatch{ 124 ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { 125 options.LabelSelector = labels.SelectorFromSet(labels.Set{"group": podLabel}).String() 126 obj, err := cs.CoreV1().Pods(ns).List(ctx, options) 127 return runtime.Object(obj), err 128 }, 129 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 130 options.LabelSelector = labels.SelectorFromSet(labels.Set{"group": podLabel}).String() 131 return cs.CoreV1().Pods(ns).Watch(ctx, options) 132 }, 133 }, 134 &v1.Pod{}, 135 0, 136 cache.ResourceEventHandlerFuncs{ 137 DeleteFunc: func(oldObj interface{}) { 138 if delPod, ok := oldObj.(*v1.Pod); ok { 139 observedDeletions <- delPod.Name 140 } else { 141 observedDeletions <- "" 142 } 143 }, 144 }, 145 ) 146 framework.Logf("Starting informer...") 147 go controller.Run(ctx.Done()) 148 } 149 150 const ( 151 kubeletPodDeletionDelaySeconds = 60 152 additionalWaitPerDeleteSeconds = 5 153 ) 154 155 // Tests the behavior of NoExecuteTaintManager. Following scenarios are included: 156 // - eviction of non-tolerating pods from a tainted node, 157 // - lack of eviction of tolerating pods from a tainted node, 158 // - delayed eviction of short-tolerating pod from a tainted node, 159 // - lack of eviction of short-tolerating pod after taint removal. 160 var _ = SIGDescribe("NoExecuteTaintManager Single Pod", framework.WithSerial(), func() { 161 var cs clientset.Interface 162 var ns string 163 f := framework.NewDefaultFramework("taint-single-pod") 164 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 165 166 ginkgo.BeforeEach(func(ctx context.Context) { 167 cs = f.ClientSet 168 ns = f.Namespace.Name 169 170 e2enode.WaitForTotalHealthy(ctx, cs, time.Minute) 171 172 err := framework.CheckTestingNSDeletedExcept(ctx, cs, ns) 173 framework.ExpectNoError(err) 174 }) 175 176 // 1. Run a pod 177 // 2. Taint the node running this pod with a no-execute taint 178 // 3. See if pod will get evicted 179 ginkgo.It("evicts pods from tainted nodes", func(ctx context.Context) { 180 podName := "taint-eviction-1" 181 pod := createPodForTaintsTest(false, 0, podName, podName, ns) 182 observedDeletions := make(chan string, 100) 183 createTestController(ctx, cs, observedDeletions, podName, ns) 184 185 ginkgo.By("Starting pod...") 186 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 187 framework.ExpectNoError(err) 188 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 189 190 ginkgo.By("Trying to apply a taint on the Node") 191 testTaint := getTestTaint() 192 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 193 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 194 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 195 196 // Wait a bit 197 ginkgo.By("Waiting for Pod to be deleted") 198 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 199 select { 200 case <-timeoutChannel: 201 framework.Failf("Failed to evict Pod") 202 case <-observedDeletions: 203 framework.Logf("Noticed Pod eviction. Test successful") 204 } 205 }) 206 207 // 1. Run a pod with toleration 208 // 2. Taint the node running this pod with a no-execute taint 209 // 3. See if pod won't get evicted 210 ginkgo.It("doesn't evict pod with tolerations from tainted nodes", func(ctx context.Context) { 211 podName := "taint-eviction-2" 212 pod := createPodForTaintsTest(true, 0, podName, podName, ns) 213 observedDeletions := make(chan string, 100) 214 createTestController(ctx, cs, observedDeletions, podName, ns) 215 216 ginkgo.By("Starting pod...") 217 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 218 framework.ExpectNoError(err) 219 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 220 221 ginkgo.By("Trying to apply a taint on the Node") 222 testTaint := getTestTaint() 223 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 224 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 225 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 226 227 // Wait a bit 228 ginkgo.By("Waiting for Pod to be deleted") 229 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 230 select { 231 case <-timeoutChannel: 232 framework.Logf("Pod wasn't evicted. Test successful") 233 case <-observedDeletions: 234 framework.Failf("Pod was evicted despite toleration") 235 } 236 }) 237 238 // 1. Run a pod with a finite toleration 239 // 2. Taint the node running this pod with a no-execute taint 240 // 3. See if pod won't get evicted before toleration time runs out 241 // 4. See if pod will get evicted after toleration time runs out 242 ginkgo.It("eventually evict pod with finite tolerations from tainted nodes", func(ctx context.Context) { 243 podName := "taint-eviction-3" 244 pod := createPodForTaintsTest(true, kubeletPodDeletionDelaySeconds+2*additionalWaitPerDeleteSeconds, podName, podName, ns) 245 observedDeletions := make(chan string, 100) 246 createTestController(ctx, cs, observedDeletions, podName, ns) 247 248 ginkgo.By("Starting pod...") 249 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 250 framework.ExpectNoError(err) 251 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 252 253 ginkgo.By("Trying to apply a taint on the Node") 254 testTaint := getTestTaint() 255 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 256 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 257 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 258 259 // Wait a bit 260 ginkgo.By("Waiting to see if a Pod won't be deleted") 261 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 262 select { 263 case <-timeoutChannel: 264 framework.Logf("Pod wasn't evicted") 265 case <-observedDeletions: 266 framework.Failf("Pod was evicted despite toleration") 267 return 268 } 269 ginkgo.By("Waiting for Pod to be deleted") 270 timeoutChannel = time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 271 select { 272 case <-timeoutChannel: 273 framework.Failf("Pod wasn't evicted") 274 case <-observedDeletions: 275 framework.Logf("Pod was evicted after toleration time run out. Test successful") 276 return 277 } 278 }) 279 280 /* 281 Release: v1.16 282 Testname: Taint, Pod Eviction on taint removal 283 Description: The Pod with toleration timeout scheduled on a tainted Node MUST not be 284 evicted if the taint is removed before toleration time ends. 285 */ 286 framework.ConformanceIt("removing taint cancels eviction", f.WithDisruptive(), func(ctx context.Context) { 287 podName := "taint-eviction-4" 288 pod := createPodForTaintsTest(true, 2*additionalWaitPerDeleteSeconds, podName, podName, ns) 289 observedDeletions := make(chan string, 100) 290 createTestController(ctx, cs, observedDeletions, podName, ns) 291 292 // 1. Run a pod with short toleration 293 ginkgo.By("Starting pod...") 294 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 295 framework.ExpectNoError(err) 296 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 297 298 // 2. Taint the node running this pod with a no-execute taint 299 ginkgo.By("Trying to apply a taint on the Node") 300 testTaint := getTestTaint() 301 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 302 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 303 taintRemoved := false 304 ginkgo.DeferCleanup(func(ctx context.Context) { 305 if !taintRemoved { 306 e2enode.RemoveTaintOffNode(ctx, cs, nodeName, testTaint) 307 } 308 }) 309 310 // 3. Wait some time 311 ginkgo.By("Waiting short time to make sure Pod is queued for deletion") 312 timeoutChannel := time.NewTimer(additionalWaitPerDeleteSeconds).C 313 select { 314 case <-timeoutChannel: 315 framework.Logf("Pod wasn't evicted. Proceeding") 316 case <-observedDeletions: 317 framework.Failf("Pod was evicted despite toleration") 318 return 319 } 320 321 // 4. Remove the taint 322 framework.Logf("Removing taint from Node") 323 e2enode.RemoveTaintOffNode(ctx, cs, nodeName, testTaint) 324 taintRemoved = true 325 326 // 5. See if Pod won't be evicted. 327 ginkgo.By("Waiting some time to make sure that toleration time passed.") 328 timeoutChannel = time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+3*additionalWaitPerDeleteSeconds) * time.Second).C 329 select { 330 case <-timeoutChannel: 331 framework.Logf("Pod wasn't evicted. Test successful") 332 case <-observedDeletions: 333 framework.Failf("Pod was evicted despite toleration") 334 } 335 }) 336 337 // 1. Run a pod with finalizer 338 // 2. Taint the node running this pod with a no-execute taint 339 // 3. See if pod will get evicted and has the pod disruption condition 340 // 4. Remove the finalizer so that the pod can be deleted by GC 341 ginkgo.It("pods evicted from tainted nodes have pod disruption condition", func(ctx context.Context) { 342 podName := "taint-eviction-pod-disruption" 343 pod := createPodForTaintsTest(false, 0, podName, podName, ns) 344 pod.Finalizers = append(pod.Finalizers, testFinalizer) 345 346 ginkgo.By("Starting pod...") 347 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod, 2*time.Minute) 348 framework.ExpectNoError(err) 349 framework.Logf("Pod is running on %v. Tainting Node", nodeName) 350 351 ginkgo.DeferCleanup(e2epod.NewPodClient(f).RemoveFinalizer, pod.Name, testFinalizer) 352 353 ginkgo.By("Trying to apply a taint on the Node") 354 testTaint := getTestTaint() 355 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 356 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 357 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 358 359 ginkgo.By("Waiting for Pod to be terminating") 360 timeout := time.Duration(kubeletPodDeletionDelaySeconds+3*additionalWaitPerDeleteSeconds) * time.Second 361 err = e2epod.WaitForPodTerminatingInNamespaceTimeout(ctx, f.ClientSet, pod.Name, pod.Namespace, timeout) 362 framework.ExpectNoError(err) 363 364 ginkgo.By("Verifying the pod has the pod disruption condition") 365 e2epod.VerifyPodHasConditionWithType(ctx, f, pod, v1.DisruptionTarget) 366 }) 367 }) 368 369 var _ = SIGDescribe("NoExecuteTaintManager Multiple Pods", framework.WithSerial(), func() { 370 var cs clientset.Interface 371 var ns string 372 f := framework.NewDefaultFramework("taint-multiple-pods") 373 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 374 375 ginkgo.BeforeEach(func(ctx context.Context) { 376 cs = f.ClientSet 377 ns = f.Namespace.Name 378 379 e2enode.WaitForTotalHealthy(ctx, cs, time.Minute) 380 381 err := framework.CheckTestingNSDeletedExcept(ctx, cs, ns) 382 framework.ExpectNoError(err) 383 }) 384 385 // 1. Run two pods; one with toleration, one without toleration 386 // 2. Taint the nodes running those pods with a no-execute taint 387 // 3. See if pod-without-toleration get evicted, and pod-with-toleration is kept 388 ginkgo.It("only evicts pods without tolerations from tainted nodes", func(ctx context.Context) { 389 podGroup := "taint-eviction-a" 390 observedDeletions := make(chan string, 100) 391 createTestController(ctx, cs, observedDeletions, podGroup, ns) 392 393 pod1 := createPodForTaintsTest(false, 0, podGroup+"1", podGroup, ns) 394 pod2 := createPodForTaintsTest(true, 0, podGroup+"2", podGroup, ns) 395 396 ginkgo.By("Starting pods...") 397 nodeName1, err := testutils.RunPodAndGetNodeName(ctx, cs, pod1, 2*time.Minute) 398 framework.ExpectNoError(err) 399 framework.Logf("Pod1 is running on %v. Tainting Node", nodeName1) 400 nodeName2, err := testutils.RunPodAndGetNodeName(ctx, cs, pod2, 2*time.Minute) 401 framework.ExpectNoError(err) 402 framework.Logf("Pod2 is running on %v. Tainting Node", nodeName2) 403 404 ginkgo.By("Trying to apply a taint on the Nodes") 405 testTaint := getTestTaint() 406 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName1, testTaint) 407 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName1, &testTaint) 408 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName1, testTaint) 409 if nodeName2 != nodeName1 { 410 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName2, testTaint) 411 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName2, &testTaint) 412 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName2, testTaint) 413 } 414 415 // Wait a bit 416 ginkgo.By("Waiting for Pod1 to be deleted") 417 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+additionalWaitPerDeleteSeconds) * time.Second).C 418 var evicted int 419 for { 420 select { 421 case <-timeoutChannel: 422 if evicted == 0 { 423 framework.Failf("Failed to evict Pod1.") 424 } else if evicted == 2 { 425 framework.Failf("Pod1 is evicted. But unexpected Pod2 also get evicted.") 426 } 427 return 428 case podName := <-observedDeletions: 429 evicted++ 430 if podName == podGroup+"1" { 431 framework.Logf("Noticed Pod %q gets evicted.", podName) 432 } else if podName == podGroup+"2" { 433 framework.Failf("Unexpected Pod %q gets evicted.", podName) 434 return 435 } 436 } 437 } 438 }) 439 440 /* 441 Release: v1.16 442 Testname: Pod Eviction, Toleration limits 443 Description: In a multi-pods scenario with tolerationSeconds, the pods MUST be evicted as per 444 the toleration time limit. 445 */ 446 framework.ConformanceIt("evicts pods with minTolerationSeconds", f.WithDisruptive(), func(ctx context.Context) { 447 podGroup := "taint-eviction-b" 448 observedDeletions := make(chan string, 100) 449 createTestController(ctx, cs, observedDeletions, podGroup, ns) 450 451 // 1. Run two pods both with toleration; one with tolerationSeconds=5, the other with 25 452 pod1 := createPodForTaintsTest(true, additionalWaitPerDeleteSeconds, podGroup+"1", podGroup, ns) 453 pod2 := createPodForTaintsTest(true, 5*additionalWaitPerDeleteSeconds, podGroup+"2", podGroup, ns) 454 455 ginkgo.By("Starting pods...") 456 nodeName, err := testutils.RunPodAndGetNodeName(ctx, cs, pod1, 2*time.Minute) 457 framework.ExpectNoError(err) 458 node, err := cs.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 459 framework.ExpectNoError(err) 460 nodeHostNameLabel, ok := node.GetObjectMeta().GetLabels()["kubernetes.io/hostname"] 461 if !ok { 462 framework.Failf("error getting kubernetes.io/hostname label on node %s", nodeName) 463 } 464 framework.ExpectNoError(err) 465 framework.Logf("Pod1 is running on %v. Tainting Node", nodeName) 466 // ensure pod2 lands on the same node as pod1 467 pod2.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": nodeHostNameLabel} 468 _, err = testutils.RunPodAndGetNodeName(ctx, cs, pod2, 2*time.Minute) 469 framework.ExpectNoError(err) 470 // Wait for pods to be running state before eviction happens 471 framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, pod1)) 472 framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, pod2)) 473 framework.Logf("Pod2 is running on %v. Tainting Node", nodeName) 474 475 // 2. Taint the nodes running those pods with a no-execute taint 476 ginkgo.By("Trying to apply a taint on the Node") 477 testTaint := getTestTaint() 478 e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint) 479 e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint) 480 ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint) 481 482 // 3. Wait to see if both pods get evicted in between [5, 25] seconds 483 ginkgo.By("Waiting for Pod1 and Pod2 to be deleted") 484 // On Windows hosts, we're noticing that the pods are taking more time to get deleted, so having larger timeout 485 // is good 486 timeoutChannel := time.NewTimer(time.Duration(kubeletPodDeletionDelaySeconds+10*additionalWaitPerDeleteSeconds) * time.Second).C 487 var evicted int 488 for evicted != 2 { 489 select { 490 case <-timeoutChannel: 491 framework.Failf("Failed to evict all Pods. %d pod(s) is not evicted.", 2-evicted) 492 return 493 case podName := <-observedDeletions: 494 framework.Logf("Noticed Pod %q gets evicted.", podName) 495 evicted++ 496 } 497 } 498 }) 499 })