k8s.io/kubernetes@v1.29.3/test/integration/node/lifecycle_test.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package node 18 19 import ( 20 "fmt" 21 "testing" 22 "time" 23 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/api/resource" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/util/wait" 28 "k8s.io/apiserver/pkg/admission" 29 "k8s.io/apiserver/pkg/util/feature" 30 "k8s.io/client-go/informers" 31 clientset "k8s.io/client-go/kubernetes" 32 restclient "k8s.io/client-go/rest" 33 featuregatetesting "k8s.io/component-base/featuregate/testing" 34 "k8s.io/klog/v2" 35 "k8s.io/kubernetes/cmd/kube-controller-manager/names" 36 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 37 "k8s.io/kubernetes/pkg/controller/nodelifecycle" 38 "k8s.io/kubernetes/pkg/controller/tainteviction" 39 "k8s.io/kubernetes/pkg/features" 40 "k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds" 41 "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction" 42 pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction" 43 testutils "k8s.io/kubernetes/test/integration/util" 44 imageutils "k8s.io/kubernetes/test/utils/image" 45 ) 46 47 // TestEvictionForNoExecuteTaintAddedByUser tests taint-based eviction for a node tainted NoExecute 48 func TestEvictionForNoExecuteTaintAddedByUser(t *testing.T) { 49 // we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode 50 nodeCount := 3 51 nodeIndex := 1 // the exact node doesn't matter, pick one 52 53 tests := map[string]struct { 54 enablePodDisruptionConditions bool 55 enableSeparateTaintEvictionController bool 56 startStandaloneTaintEvictionController bool 57 wantPodEvicted bool 58 }{ 59 "Test eviction for NoExecute taint added by user; pod condition added when PodDisruptionConditions enabled; separate taint eviction controller disabled": { 60 enablePodDisruptionConditions: true, 61 enableSeparateTaintEvictionController: false, 62 startStandaloneTaintEvictionController: false, 63 wantPodEvicted: true, 64 }, 65 "Test eviction for NoExecute taint added by user; no pod condition added when PodDisruptionConditions disabled; separate taint eviction controller disabled": { 66 enablePodDisruptionConditions: false, 67 enableSeparateTaintEvictionController: false, 68 startStandaloneTaintEvictionController: false, 69 wantPodEvicted: true, 70 }, 71 "Test eviction for NoExecute taint added by user; separate taint eviction controller enabled but not started": { 72 enablePodDisruptionConditions: false, 73 enableSeparateTaintEvictionController: true, 74 startStandaloneTaintEvictionController: false, 75 wantPodEvicted: false, 76 }, 77 "Test eviction for NoExecute taint added by user; separate taint eviction controller enabled and started": { 78 enablePodDisruptionConditions: false, 79 enableSeparateTaintEvictionController: true, 80 startStandaloneTaintEvictionController: true, 81 wantPodEvicted: true, 82 }, 83 } 84 85 for name, test := range tests { 86 t.Run(name, func(t *testing.T) { 87 var nodes []*v1.Node 88 for i := 0; i < nodeCount; i++ { 89 node := &v1.Node{ 90 ObjectMeta: metav1.ObjectMeta{ 91 Name: fmt.Sprintf("testnode-%d", i), 92 Labels: map[string]string{"node.kubernetes.io/exclude-disruption": "true"}, 93 }, 94 Spec: v1.NodeSpec{}, 95 Status: v1.NodeStatus{ 96 Conditions: []v1.NodeCondition{ 97 { 98 Type: v1.NodeReady, 99 Status: v1.ConditionTrue, 100 }, 101 }, 102 }, 103 } 104 nodes = append(nodes, node) 105 } 106 testPod := &v1.Pod{ 107 ObjectMeta: metav1.ObjectMeta{ 108 Name: "testpod", 109 }, 110 Spec: v1.PodSpec{ 111 NodeName: nodes[nodeIndex].Name, 112 Containers: []v1.Container{ 113 {Name: "container", Image: imageutils.GetPauseImageName()}, 114 }, 115 }, 116 Status: v1.PodStatus{ 117 Phase: v1.PodRunning, 118 Conditions: []v1.PodCondition{ 119 { 120 Type: v1.PodReady, 121 Status: v1.ConditionTrue, 122 }, 123 }, 124 }, 125 } 126 127 defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, test.enablePodDisruptionConditions)() 128 defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SeparateTaintEvictionController, test.enableSeparateTaintEvictionController)() 129 testCtx := testutils.InitTestAPIServer(t, "taint-no-execute", nil) 130 cs := testCtx.ClientSet 131 132 // Build clientset and informers for controllers. 133 externalClientConfig := restclient.CopyConfig(testCtx.KubeConfig) 134 externalClientConfig.QPS = -1 135 externalClientset := clientset.NewForConfigOrDie(externalClientConfig) 136 externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second) 137 138 // Start NodeLifecycleController for taint. 139 nc, err := nodelifecycle.NewNodeLifecycleController( 140 testCtx.Ctx, 141 externalInformers.Coordination().V1().Leases(), 142 externalInformers.Core().V1().Pods(), 143 externalInformers.Core().V1().Nodes(), 144 externalInformers.Apps().V1().DaemonSets(), 145 cs, 146 1*time.Second, // Node monitor grace period 147 time.Minute, // Node startup grace period 148 time.Millisecond, // Node monitor period 149 100, // Eviction limiter QPS 150 100, // Secondary eviction limiter QPS 151 50, // Large cluster threshold 152 0.55, // Unhealthy zone threshold 153 ) 154 if err != nil { 155 t.Fatalf("Failed to create node controller: %v", err) 156 } 157 158 // Waiting for all controllers to sync 159 externalInformers.Start(testCtx.Ctx.Done()) 160 externalInformers.WaitForCacheSync(testCtx.Ctx.Done()) 161 162 // Run all controllers 163 go nc.Run(testCtx.Ctx) 164 165 // Start TaintManager 166 if test.startStandaloneTaintEvictionController { 167 tm, _ := tainteviction.New( 168 testCtx.Ctx, 169 testCtx.ClientSet, 170 externalInformers.Core().V1().Pods(), 171 externalInformers.Core().V1().Nodes(), 172 names.TaintEvictionController, 173 ) 174 go tm.Run(testCtx.Ctx) 175 } 176 177 for index := range nodes { 178 nodes[index], err = cs.CoreV1().Nodes().Create(testCtx.Ctx, nodes[index], metav1.CreateOptions{}) 179 if err != nil { 180 t.Fatalf("Failed to create node, err: %v", err) 181 } 182 } 183 184 testPod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, testPod, metav1.CreateOptions{}) 185 if err != nil { 186 t.Fatalf("Test Failed: error: %v, while creating pod", err) 187 } 188 189 if err := testutils.AddTaintToNode(cs, nodes[nodeIndex].Name, v1.Taint{Key: "CustomTaintByUser", Effect: v1.TaintEffectNoExecute}); err != nil { 190 t.Errorf("Failed to taint node in test %s <%s>, err: %v", name, nodes[nodeIndex].Name, err) 191 } 192 193 err = wait.PollUntilContextTimeout(testCtx.Ctx, time.Second, time.Second*20, true, testutils.PodIsGettingEvicted(cs, testPod.Namespace, testPod.Name)) 194 if err != nil && test.wantPodEvicted { 195 t.Fatalf("Test Failed: error %v while waiting for pod %q to be evicted", err, klog.KObj(testPod)) 196 } else if !wait.Interrupted(err) && !test.wantPodEvicted { 197 t.Fatalf("Test Failed: unexpected eviction of pod %q", klog.KObj(testPod)) 198 } 199 200 testPod, err = cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, testPod.Name, metav1.GetOptions{}) 201 if err != nil { 202 t.Fatalf("Test Failed: error: %q, while getting updated pod", err) 203 } 204 _, cond := podutil.GetPodCondition(&testPod.Status, v1.DisruptionTarget) 205 if test.enablePodDisruptionConditions && cond == nil { 206 t.Errorf("Pod %q does not have the expected condition: %q", klog.KObj(testPod), v1.DisruptionTarget) 207 } else if !test.enablePodDisruptionConditions && cond != nil { 208 t.Errorf("Pod %q has an unexpected condition: %q", klog.KObj(testPod), v1.DisruptionTarget) 209 } 210 }) 211 } 212 } 213 214 // TestTaintBasedEvictions tests related cases for the TaintBasedEvictions feature 215 func TestTaintBasedEvictions(t *testing.T) { 216 // we need at least 2 nodes to prevent lifecycle manager from entering "fully-disrupted" mode 217 nodeCount := 3 218 nodeIndex := 1 // the exact node doesn't matter, pick one 219 zero := int64(0) 220 gracePeriod := int64(1) 221 testPod := &v1.Pod{ 222 ObjectMeta: metav1.ObjectMeta{Name: "testpod1", DeletionGracePeriodSeconds: &zero}, 223 Spec: v1.PodSpec{ 224 Containers: []v1.Container{ 225 {Name: "container", Image: imageutils.GetPauseImageName()}, 226 }, 227 Tolerations: []v1.Toleration{ 228 { 229 Key: v1.TaintNodeNotReady, 230 Operator: v1.TolerationOpExists, 231 Effect: v1.TaintEffectNoExecute, 232 }, 233 }, 234 TerminationGracePeriodSeconds: &gracePeriod, 235 }, 236 } 237 tests := []struct { 238 name string 239 nodeTaints []v1.Taint 240 nodeConditions []v1.NodeCondition 241 pod *v1.Pod 242 tolerationSeconds int64 243 expectedWaitForPodCondition string 244 enableSeparateTaintEvictionController bool 245 }{ 246 { 247 name: "Taint based evictions for NodeNotReady and 200 tolerationseconds; separate taint eviction controller disabled", 248 nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, 249 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, 250 pod: testPod.DeepCopy(), 251 tolerationSeconds: 200, 252 expectedWaitForPodCondition: "updated with tolerationSeconds of 200", 253 enableSeparateTaintEvictionController: false, 254 }, 255 { 256 name: "Taint based evictions for NodeNotReady and 200 tolerationseconds; separate taint eviction controller enabled", 257 nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, 258 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, 259 pod: testPod.DeepCopy(), 260 tolerationSeconds: 200, 261 expectedWaitForPodCondition: "updated with tolerationSeconds of 200", 262 enableSeparateTaintEvictionController: true, 263 }, 264 { 265 name: "Taint based evictions for NodeNotReady with no pod tolerations; separate taint eviction controller disabled", 266 nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, 267 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, 268 pod: &v1.Pod{ 269 ObjectMeta: metav1.ObjectMeta{Name: "testpod1"}, 270 Spec: v1.PodSpec{ 271 Containers: []v1.Container{ 272 {Name: "container", Image: imageutils.GetPauseImageName()}, 273 }, 274 }, 275 }, 276 tolerationSeconds: 300, 277 expectedWaitForPodCondition: "updated with tolerationSeconds=300", 278 enableSeparateTaintEvictionController: false, 279 }, 280 { 281 name: "Taint based evictions for NodeNotReady with no pod tolerations; separate taint eviction controller enabled", 282 nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, 283 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, 284 pod: &v1.Pod{ 285 ObjectMeta: metav1.ObjectMeta{Name: "testpod1"}, 286 Spec: v1.PodSpec{ 287 Containers: []v1.Container{ 288 {Name: "container", Image: imageutils.GetPauseImageName()}, 289 }, 290 }, 291 }, 292 tolerationSeconds: 300, 293 expectedWaitForPodCondition: "updated with tolerationSeconds=300", 294 enableSeparateTaintEvictionController: true, 295 }, 296 { 297 name: "Taint based evictions for NodeNotReady and 0 tolerationseconds; separate taint eviction controller disabled", 298 nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, 299 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, 300 pod: testPod.DeepCopy(), 301 tolerationSeconds: 0, 302 expectedWaitForPodCondition: "terminating", 303 enableSeparateTaintEvictionController: false, 304 }, 305 { 306 name: "Taint based evictions for NodeNotReady and 0 tolerationseconds; separate taint eviction controller enabled", 307 nodeTaints: []v1.Taint{{Key: v1.TaintNodeNotReady, Effect: v1.TaintEffectNoExecute}}, 308 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}, 309 pod: testPod.DeepCopy(), 310 tolerationSeconds: 0, 311 expectedWaitForPodCondition: "terminating", 312 enableSeparateTaintEvictionController: true, 313 }, 314 { 315 name: "Taint based evictions for NodeUnreachable; separate taint eviction controller disabled", 316 nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}}, 317 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}}, 318 enableSeparateTaintEvictionController: false, 319 }, 320 { 321 name: "Taint based evictions for NodeUnreachable; separate taint eviction controller enabled", 322 nodeTaints: []v1.Taint{{Key: v1.TaintNodeUnreachable, Effect: v1.TaintEffectNoExecute}}, 323 nodeConditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionUnknown}}, 324 enableSeparateTaintEvictionController: true, 325 }, 326 } 327 328 // Build admission chain handler. 329 podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{}) 330 admission := admission.NewChainHandler( 331 podTolerations, 332 defaulttolerationseconds.NewDefaultTolerationSeconds(), 333 ) 334 for _, test := range tests { 335 t.Run(test.name, func(t *testing.T) { 336 defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.SeparateTaintEvictionController, test.enableSeparateTaintEvictionController)() 337 338 testCtx := testutils.InitTestAPIServer(t, "taint-based-evictions", admission) 339 340 // Build clientset and informers for controllers. 341 externalClientConfig := restclient.CopyConfig(testCtx.KubeConfig) 342 externalClientConfig.QPS = -1 343 externalClientset := clientset.NewForConfigOrDie(externalClientConfig) 344 externalInformers := informers.NewSharedInformerFactory(externalClientset, time.Second) 345 podTolerations.SetExternalKubeClientSet(externalClientset) 346 podTolerations.SetExternalKubeInformerFactory(externalInformers) 347 348 cs := testCtx.ClientSet 349 350 // Start NodeLifecycleController for taint. 351 nc, err := nodelifecycle.NewNodeLifecycleController( 352 testCtx.Ctx, 353 externalInformers.Coordination().V1().Leases(), 354 externalInformers.Core().V1().Pods(), 355 externalInformers.Core().V1().Nodes(), 356 externalInformers.Apps().V1().DaemonSets(), 357 cs, 358 1*time.Second, // Node monitor grace period 359 time.Minute, // Node startup grace period 360 time.Millisecond, // Node monitor period 361 100, // Eviction limiter QPS 362 100, // Secondary eviction limiter QPS 363 50, // Large cluster threshold 364 0.55, // Unhealthy zone threshold 365 ) 366 if err != nil { 367 t.Fatalf("Failed to create node controller: %v", err) 368 } 369 370 // Waiting for all controllers to sync 371 externalInformers.Start(testCtx.Ctx.Done()) 372 externalInformers.WaitForCacheSync(testCtx.Ctx.Done()) 373 374 // Run the controller 375 go nc.Run(testCtx.Ctx) 376 377 // Start TaintEvictionController 378 if test.enableSeparateTaintEvictionController { 379 tm, _ := tainteviction.New( 380 testCtx.Ctx, 381 testCtx.ClientSet, 382 externalInformers.Core().V1().Pods(), 383 externalInformers.Core().V1().Nodes(), 384 names.TaintEvictionController, 385 ) 386 go tm.Run(testCtx.Ctx) 387 } 388 389 nodeRes := v1.ResourceList{ 390 v1.ResourceCPU: resource.MustParse("4000m"), 391 v1.ResourceMemory: resource.MustParse("16Gi"), 392 v1.ResourcePods: resource.MustParse("110"), 393 } 394 395 var nodes []*v1.Node 396 for i := 0; i < nodeCount; i++ { 397 node := &v1.Node{ 398 ObjectMeta: metav1.ObjectMeta{ 399 Name: fmt.Sprintf("node-%d", i), 400 Labels: map[string]string{ 401 v1.LabelTopologyRegion: "region1", 402 v1.LabelTopologyZone: "zone1", 403 "node.kubernetes.io/exclude-disruption": "true", 404 }, 405 }, 406 Spec: v1.NodeSpec{}, 407 Status: v1.NodeStatus{ 408 Capacity: nodeRes, 409 Allocatable: nodeRes, 410 }, 411 } 412 if i == nodeIndex { 413 node.Status.Conditions = append(node.Status.Conditions, test.nodeConditions...) 414 } else { 415 node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{ 416 Type: v1.NodeReady, 417 Status: v1.ConditionTrue, 418 }) 419 } 420 nodes = append(nodes, node) 421 if _, err := cs.CoreV1().Nodes().Create(testCtx.Ctx, node, metav1.CreateOptions{}); err != nil { 422 t.Fatalf("Failed to create node: %q, err: %v", klog.KObj(node), err) 423 } 424 } 425 426 if test.pod != nil { 427 test.pod.Spec.NodeName = nodes[nodeIndex].Name 428 test.pod.Name = "testpod" 429 if len(test.pod.Spec.Tolerations) > 0 { 430 test.pod.Spec.Tolerations[0].TolerationSeconds = &test.tolerationSeconds 431 } 432 433 test.pod, err = cs.CoreV1().Pods(testCtx.NS.Name).Create(testCtx.Ctx, test.pod, metav1.CreateOptions{}) 434 if err != nil { 435 t.Fatalf("Test Failed: error: %q, while creating pod %q", err, klog.KObj(test.pod)) 436 } 437 } 438 439 if err := testutils.WaitForNodeTaints(cs, nodes[nodeIndex], test.nodeTaints); err != nil { 440 t.Errorf("Failed to taint node %q, err: %v", klog.KObj(nodes[nodeIndex]), err) 441 } 442 443 if test.pod != nil { 444 err = wait.PollImmediate(time.Second, time.Second*15, func() (bool, error) { 445 pod, err := cs.CoreV1().Pods(test.pod.Namespace).Get(testCtx.Ctx, test.pod.Name, metav1.GetOptions{}) 446 if err != nil { 447 return false, err 448 } 449 // as node is unreachable, pod0 is expected to be in Terminating status 450 // rather than getting deleted 451 if test.tolerationSeconds == 0 { 452 return pod.DeletionTimestamp != nil, nil 453 } 454 if seconds, err := testutils.GetTolerationSeconds(pod.Spec.Tolerations); err == nil { 455 return seconds == test.tolerationSeconds, nil 456 } 457 return false, nil 458 }) 459 if err != nil { 460 pod, _ := cs.CoreV1().Pods(testCtx.NS.Name).Get(testCtx.Ctx, test.pod.Name, metav1.GetOptions{}) 461 t.Fatalf("Error: %v, Expected test pod to be %s but it's %v", err, test.expectedWaitForPodCondition, pod) 462 } 463 testutils.CleanupPods(testCtx.Ctx, cs, t, []*v1.Pod{test.pod}) 464 } 465 testutils.CleanupNodes(cs, t) 466 }) 467 } 468 }