github.com/cilium/cilium@v1.16.2/operator/watchers/node_taint_test.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package watchers 5 6 import ( 7 "encoding/json" 8 "testing" 9 "time" 10 11 "github.com/stretchr/testify/require" 12 corev1 "k8s.io/api/core/v1" 13 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 "k8s.io/apimachinery/pkg/runtime" 15 "k8s.io/client-go/kubernetes/fake" 16 k8sTesting "k8s.io/client-go/testing" 17 "k8s.io/client-go/util/workqueue" 18 19 "github.com/cilium/cilium/pkg/k8s" 20 slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1" 21 slim_metav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1" 22 pkgOption "github.com/cilium/cilium/pkg/option" 23 "github.com/cilium/cilium/pkg/testutils" 24 ) 25 26 type fakeNodeGetter struct { 27 OnGetK8sSlimNode func(nodeName string) (*slim_corev1.Node, error) 28 } 29 30 func (f *fakeNodeGetter) GetK8sSlimNode(nodeName string) (*slim_corev1.Node, error) { 31 if f.OnGetK8sSlimNode != nil { 32 return f.OnGetK8sSlimNode(nodeName) 33 } 34 panic("OnGetK8sSlimNode called but not implemented!") 35 } 36 37 func (f *fakeNodeGetter) ListK8sSlimNode() []*slim_corev1.Node { 38 panic("not implemented!") 39 } 40 41 func TestNodeTaintWithoutCondition(t *testing.T) { 42 mno = markNodeOptions{ 43 RemoveNodeTaint: true, 44 SetNodeTaint: true, 45 SetCiliumIsUpCondition: false, 46 } 47 48 // create node1 with taint and without CiliumIsUp Condition 49 node1WithTaintWithoutCondition := &slim_corev1.Node{ 50 ObjectMeta: slim_metav1.ObjectMeta{ 51 Name: "k8s1", 52 }, 53 Spec: slim_corev1.NodeSpec{ 54 Taints: []slim_corev1.Taint{ 55 { 56 Key: pkgOption.Config.AgentNotReadyNodeTaintValue(), Value: "Foo", 57 }, 58 { 59 Key: "DoNoRemoveThisTaint", Value: "Foo", 60 }, 61 }, 62 }, 63 Status: slim_corev1.NodeStatus{ 64 Conditions: nil, 65 }, 66 } 67 68 ciliumPodOnNode1 := &slim_corev1.Pod{ 69 Spec: slim_corev1.PodSpec{ 70 NodeName: "k8s1", 71 }, 72 Status: slim_corev1.PodStatus{ 73 Conditions: []slim_corev1.PodCondition{ 74 { 75 Type: slim_corev1.PodReady, 76 Status: slim_corev1.ConditionTrue, 77 }, 78 }, 79 }, 80 } 81 82 // Add the cilium pod that is running on k8s1 83 err := ciliumPodsStore.Add(ciliumPodOnNode1) 84 require.NoError(t, err) 85 86 patchReceived := make(chan struct{}) 87 88 // Create a fake client to receive the patch from cilium-operator 89 fakeClient := &fake.Clientset{} 90 fakeClient.AddReactor("patch", "nodes", func(action k8sTesting.Action) (handled bool, ret runtime.Object, err error) { 91 // If we are updating the spec, the subresource should be empty. 92 // If we update the status the subresource is 'status' 93 require.Empty(t, action.GetSubresource()) 94 95 pa := action.(k8sTesting.PatchAction) 96 expectedJSONPatch := []k8s.JSONPatch{ 97 { 98 OP: "test", 99 Path: "/spec/taints", 100 Value: []slim_corev1.Taint{ 101 { 102 Key: pkgOption.Config.AgentNotReadyNodeTaintValue(), Value: "Foo", 103 }, 104 { 105 Key: "DoNoRemoveThisTaint", Value: "Foo", 106 }, 107 }, 108 }, 109 { 110 OP: "replace", 111 Path: "/spec/taints", 112 Value: []slim_corev1.Taint{ 113 { 114 Key: "DoNoRemoveThisTaint", Value: "Foo", 115 }, 116 }, 117 }, 118 } 119 expectedPatch, err := json.Marshal(expectedJSONPatch) 120 require.NoError(t, err) 121 require.Equal(t, expectedPatch, pa.GetPatch()) 122 123 patchReceived <- struct{}{} 124 return true, nil, nil 125 }) 126 127 fng := &fakeNodeGetter{ 128 OnGetK8sSlimNode: func(nodeName string) (*slim_corev1.Node, error) { 129 require.Equal(t, "k8s1", nodeName) 130 return node1WithTaintWithoutCondition, nil 131 }, 132 } 133 134 nodeQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node-queue") 135 136 key, err := queueKeyFunc(node1WithTaintWithoutCondition) 137 require.NoError(t, err) 138 139 nodeQueue.Add(key) 140 141 continueProcess := checkTaintForNextNodeItem(fakeClient, fng, nodeQueue) 142 require.True(t, continueProcess) 143 144 err = testutils.WaitUntil(func() bool { 145 select { 146 case <-patchReceived: 147 return true 148 default: 149 return false 150 } 151 }, 1*time.Second) 152 require.NoError(t, err, "Patch was never received by k8s fake client") 153 154 // Test if we create the same patch if we receive an event from Cilium pods 155 ciliumPodQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "cilium-pod-queue") 156 157 key, err = queueKeyFunc(ciliumPodOnNode1) 158 require.NoError(t, err) 159 160 ciliumPodQueue.Add(key) 161 162 continueProcess = processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 163 require.True(t, continueProcess) 164 165 err = testutils.WaitUntil(func() bool { 166 select { 167 case <-patchReceived: 168 return true 169 default: 170 return false 171 } 172 }, 1*time.Second) 173 require.NoError(t, err, "Patch was never received by k8s fake client") 174 } 175 176 func TestNodeCondition(t *testing.T) { 177 mno = markNodeOptions{ 178 RemoveNodeTaint: false, 179 SetNodeTaint: false, 180 SetCiliumIsUpCondition: true, 181 } 182 183 // create node1 with taint and with CiliumIsUp Condition. The taint 184 // shouldn't be removed because we have marked it as 'false' in the 185 // markNodeOptions 186 node1WithTaintWithoutCondition := &slim_corev1.Node{ 187 ObjectMeta: slim_metav1.ObjectMeta{ 188 Name: "k8s1", 189 }, 190 Spec: slim_corev1.NodeSpec{ 191 Taints: []slim_corev1.Taint{ 192 { 193 Key: pkgOption.Config.AgentNotReadyNodeTaintValue(), Value: "Foo", 194 }, 195 { 196 Key: "DoNoRemoveThisTaint", Value: "Foo", 197 }, 198 }, 199 }, 200 Status: slim_corev1.NodeStatus{ 201 Conditions: nil, 202 }, 203 } 204 205 ciliumPodOnNode1 := &slim_corev1.Pod{ 206 Spec: slim_corev1.PodSpec{ 207 NodeName: "k8s1", 208 }, 209 Status: slim_corev1.PodStatus{ 210 Conditions: []slim_corev1.PodCondition{ 211 { 212 Type: slim_corev1.PodReady, 213 Status: slim_corev1.ConditionTrue, 214 }, 215 }, 216 }, 217 } 218 219 // Add the cilium pod that is running on k8s1 220 err := ciliumPodsStore.Add(ciliumPodOnNode1) 221 require.NoError(t, err) 222 223 patchReceived := make(chan struct{}) 224 225 // Create a fake client to receive the patch from cilium-operator 226 fakeClient := &fake.Clientset{} 227 fakeClient.AddReactor("patch", "nodes", func(action k8sTesting.Action) (handled bool, ret runtime.Object, err error) { 228 // If we are updating the spec, the subresource should be empty. 229 // If we update the status the subresource is 'status' 230 require.Equal(t, action.GetSubresource(), "status") 231 232 pa := action.(k8sTesting.PatchAction) 233 expectedPatch := map[string]map[string][]corev1.NodeCondition{ 234 "status": { 235 "conditions": []corev1.NodeCondition{ 236 { 237 Type: corev1.NodeNetworkUnavailable, 238 Status: corev1.ConditionFalse, 239 Reason: ciliumNodeConditionReason, 240 Message: "Cilium is running on this node", 241 // Set a dummy time since we can't mock time.Now() 242 LastTransitionTime: metav1.NewTime(time.Time{}), 243 LastHeartbeatTime: metav1.NewTime(time.Time{}), 244 }, 245 }, 246 }, 247 } 248 var receivedPatch map[string]map[string][]corev1.NodeCondition 249 err = json.Unmarshal(pa.GetPatch(), &receivedPatch) 250 require.NoError(t, err) 251 252 receivedPatch["status"]["conditions"][0].LastTransitionTime = metav1.NewTime(time.Time{}) 253 receivedPatch["status"]["conditions"][0].LastHeartbeatTime = metav1.NewTime(time.Time{}) 254 255 require.Equal(t, expectedPatch, receivedPatch) 256 257 patchReceived <- struct{}{} 258 return true, nil, nil 259 }) 260 261 fng := &fakeNodeGetter{ 262 OnGetK8sSlimNode: func(nodeName string) (*slim_corev1.Node, error) { 263 require.Equal(t, "k8s1", nodeName) 264 return node1WithTaintWithoutCondition, nil 265 }, 266 } 267 268 nodeQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node-queue") 269 270 key, err := queueKeyFunc(node1WithTaintWithoutCondition) 271 require.NoError(t, err) 272 273 nodeQueue.Add(key) 274 275 continueProcess := checkTaintForNextNodeItem(fakeClient, fng, nodeQueue) 276 require.True(t, continueProcess) 277 278 err = testutils.WaitUntil(func() bool { 279 select { 280 case <-patchReceived: 281 return true 282 default: 283 return false 284 } 285 }, 1*time.Second) 286 require.NoError(t, err, "Patch was never received by k8s fake client") 287 288 // Test if we create the same patch if we receive an event from Cilium pods 289 ciliumPodQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "cilium-pod-queue") 290 291 key, err = queueKeyFunc(ciliumPodOnNode1) 292 require.NoError(t, err) 293 294 ciliumPodQueue.Add(key) 295 296 continueProcess = processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 297 require.True(t, continueProcess) 298 299 err = testutils.WaitUntil(func() bool { 300 select { 301 case <-patchReceived: 302 return true 303 default: 304 return false 305 } 306 }, 1*time.Second) 307 require.NoError(t, err, "Patch was never received by k8s fake client") 308 } 309 310 func TestNodeConditionIfCiliumIsNotReady(t *testing.T) { 311 mno = markNodeOptions{ 312 RemoveNodeTaint: true, 313 SetNodeTaint: true, 314 SetCiliumIsUpCondition: true, 315 } 316 317 // create node1 with taint and with CiliumIsUp Condition. The taint 318 // shouldn't be removed because we have marked it as 'false' in the 319 // markNodeOptions 320 node1WithTaintWithoutCondition := &slim_corev1.Node{ 321 ObjectMeta: slim_metav1.ObjectMeta{ 322 Name: "k8s1", 323 }, 324 Spec: slim_corev1.NodeSpec{ 325 Taints: []slim_corev1.Taint{ 326 { 327 Key: pkgOption.Config.AgentNotReadyNodeTaintValue(), Value: "Foo", 328 }, 329 { 330 Key: "DoNoRemoveThisTaint", Value: "Foo", 331 }, 332 }, 333 }, 334 Status: slim_corev1.NodeStatus{ 335 Conditions: nil, 336 }, 337 } 338 339 // Cilium Pod is not ready thus we should not update the condition nor its 340 // node taint. 341 ciliumPodOnNode1 := &slim_corev1.Pod{ 342 Spec: slim_corev1.PodSpec{ 343 NodeName: "k8s1", 344 }, 345 Status: slim_corev1.PodStatus{ 346 Conditions: []slim_corev1.PodCondition{ 347 { 348 Type: slim_corev1.PodReady, 349 Status: slim_corev1.ConditionFalse, 350 }, 351 }, 352 }, 353 } 354 355 // Add the cilium pod that is running on k8s1 356 err := ciliumPodsStore.Add(ciliumPodOnNode1) 357 require.NoError(t, err) 358 359 patchReceived := make(chan struct{}) 360 361 // Create a fake client to receive the patch from cilium-operator 362 fakeClient := &fake.Clientset{} 363 fakeClient.AddReactor("*", "*", func(action k8sTesting.Action) (handled bool, ret runtime.Object, err error) { 364 patchReceived <- struct{}{} 365 return true, nil, nil 366 }) 367 368 fng := &fakeNodeGetter{ 369 OnGetK8sSlimNode: func(nodeName string) (*slim_corev1.Node, error) { 370 require.Equal(t, "k8s1", nodeName) 371 return node1WithTaintWithoutCondition, nil 372 }, 373 } 374 375 nodeQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node-queue") 376 377 key, err := queueKeyFunc(node1WithTaintWithoutCondition) 378 require.NoError(t, err) 379 380 nodeQueue.Add(key) 381 382 continueProcess := checkTaintForNextNodeItem(fakeClient, fng, nodeQueue) 383 require.True(t, continueProcess) 384 385 err = testutils.WaitUntil(func() bool { 386 select { 387 case <-patchReceived: 388 return true 389 default: 390 return false 391 } 392 }, 1*time.Second) 393 require.Error(t, err, "Something was sent to kube-apiserver and it shouldn't have been") 394 395 // Test if we create the same patch if we receive an event from Cilium pods 396 ciliumPodQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "cilium-pod-queue") 397 398 key, err = queueKeyFunc(ciliumPodOnNode1) 399 require.NoError(t, err) 400 401 ciliumPodQueue.Add(key) 402 403 continueProcess = processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 404 require.True(t, continueProcess) 405 406 err = testutils.WaitUntil(func() bool { 407 select { 408 case <-patchReceived: 409 return true 410 default: 411 return false 412 } 413 }, 1*time.Second) 414 require.Error(t, err, "Something was sent to kube-apiserver and it shouldn't have been") 415 } 416 417 func TestNodeConditionIfCiliumAndNodeAreReady(t *testing.T) { 418 mno = markNodeOptions{ 419 RemoveNodeTaint: true, 420 SetCiliumIsUpCondition: true, 421 SetNodeTaint: false, // we don't test _setting_ node taints here, just because it's unergonomic 422 } 423 424 // create node1 with a taint and with CiliumIsUp Condition. 425 node1WithTaintWithoutCondition := &slim_corev1.Node{ 426 ObjectMeta: slim_metav1.ObjectMeta{ 427 Name: "k8s1", 428 }, 429 Spec: slim_corev1.NodeSpec{ 430 Taints: []slim_corev1.Taint{ 431 { 432 Key: "DoNoRemoveThisTaint", Value: "Foo", 433 }, 434 }, 435 }, 436 Status: slim_corev1.NodeStatus{ 437 Conditions: []slim_corev1.NodeCondition{ 438 { 439 Type: slim_corev1.NodeNetworkUnavailable, 440 Status: slim_corev1.ConditionFalse, 441 Reason: ciliumNodeConditionReason, 442 }, 443 }, 444 }, 445 } 446 447 // Cilium Pod is ready 448 ciliumPodOnNode1 := &slim_corev1.Pod{ 449 Spec: slim_corev1.PodSpec{ 450 NodeName: "k8s1", 451 }, 452 Status: slim_corev1.PodStatus{ 453 Conditions: []slim_corev1.PodCondition{ 454 { 455 Type: slim_corev1.PodReady, 456 Status: slim_corev1.ConditionFalse, 457 }, 458 }, 459 }, 460 } 461 462 // Add the cilium pod that is running on k8s1 463 err := ciliumPodsStore.Add(ciliumPodOnNode1) 464 require.NoError(t, err) 465 466 patchReceived := make(chan struct{}) 467 468 // Create a fake client to receive the patch from cilium-operator 469 fakeClient := &fake.Clientset{} 470 fakeClient.AddReactor("*", "*", func(action k8sTesting.Action) (handled bool, ret runtime.Object, err error) { 471 patchReceived <- struct{}{} 472 return true, nil, nil 473 }) 474 475 fng := &fakeNodeGetter{ 476 OnGetK8sSlimNode: func(nodeName string) (*slim_corev1.Node, error) { 477 require.Equal(t, "k8s1", nodeName) 478 return node1WithTaintWithoutCondition, nil 479 }, 480 } 481 482 nodeQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node-queue") 483 484 key, err := queueKeyFunc(node1WithTaintWithoutCondition) 485 require.NoError(t, err) 486 487 nodeQueue.Add(key) 488 489 continueProcess := checkTaintForNextNodeItem(fakeClient, fng, nodeQueue) 490 require.True(t, continueProcess) 491 492 err = testutils.WaitUntil(func() bool { 493 select { 494 case <-patchReceived: 495 return true 496 default: 497 return false 498 } 499 }, 1*time.Second) 500 require.Error(t, err, "Something was sent to kube-apiserver and it shouldn't have been") 501 502 // Test if we don't send any patch because the node and cilium pods are ready 503 ciliumPodQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "cilium-pod-queue") 504 505 key, err = queueKeyFunc(ciliumPodOnNode1) 506 require.NoError(t, err) 507 508 ciliumPodQueue.Add(key) 509 510 continueProcess = processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 511 require.True(t, continueProcess) 512 513 err = testutils.WaitUntil(func() bool { 514 select { 515 case <-patchReceived: 516 return true 517 default: 518 return false 519 } 520 }, 1*time.Second) 521 require.Error(t, err, "Something was sent to kube-apiserver and it shouldn't have been") 522 } 523 524 // TestTaintNodeCiliumDown checks that taints are correctly managed on nodes as Cilium 525 // pods go up and down. 526 func TestTaintNodeCiliumDown(t *testing.T) { 527 mno = markNodeOptions{ 528 RemoveNodeTaint: true, 529 SetCiliumIsUpCondition: false, 530 SetNodeTaint: true, 531 } 532 533 // create node1 with an unrelated taint 534 node1 := &slim_corev1.Node{ 535 ObjectMeta: slim_metav1.ObjectMeta{ 536 Name: "k8s1", 537 }, 538 Spec: slim_corev1.NodeSpec{ 539 Taints: []slim_corev1.Taint{ 540 { 541 Key: "DoNoRemoveThisTaint", Value: "Foo", 542 }, 543 }, 544 }, 545 } 546 547 // Cilium Pod is not ready 548 ciliumPodOnNode1 := &slim_corev1.Pod{ 549 Spec: slim_corev1.PodSpec{ 550 NodeName: "k8s1", 551 }, 552 Status: slim_corev1.PodStatus{ 553 Conditions: []slim_corev1.PodCondition{ 554 { 555 Type: slim_corev1.PodReady, 556 Status: slim_corev1.ConditionFalse, 557 }, 558 }, 559 }, 560 } 561 562 // Add the cilium pod that is running on k8s1 563 err := ciliumPodsStore.Add(ciliumPodOnNode1) 564 require.NoError(t, err) 565 566 // Create a fake client to receive the patch from cilium-operator 567 fakeClient := &fake.Clientset{} 568 569 patchReceived := make(chan bool) 570 // emit a true on the patchReceived chan if a patch comes where the taint is set 571 // false if the taint is not set 572 fakeClient.AddReactor("patch", "nodes", func(action k8sTesting.Action) (handled bool, ret runtime.Object, err error) { 573 // If we are updating the spec, the subresource should be empty. 574 // If we update the status the subresource is 'status' 575 require.Empty(t, action.GetSubresource()) 576 577 pa := action.(k8sTesting.PatchAction) 578 579 patches := []struct { 580 OP string `json:"op,omitempty"` 581 Path string `json:"path,omitempty"` 582 Value []slim_corev1.Taint `json:"value"` 583 }{} 584 err = json.Unmarshal(pa.GetPatch(), &patches) 585 require.NoError(t, err) 586 require.Len(t, patches, 2) 587 588 patch := patches[1] 589 require.Equal(t, patch.OP, "replace") 590 require.Equal(t, patch.Path, "/spec/taints") 591 592 // Check to see if our taint is included 593 for _, taint := range patch.Value { 594 if taint.Key == pkgOption.Config.AgentNotReadyNodeTaintValue() { 595 patchReceived <- true 596 return true, nil, nil 597 } 598 } 599 600 patchReceived <- false 601 return true, nil, nil 602 }) 603 604 fng := &fakeNodeGetter{ 605 OnGetK8sSlimNode: func(nodeName string) (*slim_corev1.Node, error) { 606 require.Equal(t, "k8s1", nodeName) 607 return node1, nil 608 }, 609 } 610 611 ciliumPodQueue := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "cilium-pod-queue") 612 613 // Trigger the watcher with 614 // - node taint: not set 615 // - pod: scheduled, not ready 616 key, err := queueKeyFunc(ciliumPodOnNode1) 617 require.NoError(t, err) 618 ciliumPodQueue.Add(key) 619 continueProcess := processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 620 require.True(t, continueProcess) 621 622 // Ensure taint was set 623 taintSet := false 624 err = testutils.WaitUntil(func() bool { 625 select { 626 case p := <-patchReceived: 627 taintSet = p 628 return true 629 default: 630 return false 631 } 632 }, 1*time.Second) 633 require.NoError(t, err) 634 require.True(t, taintSet, "NotReady Pod should cause node taint to be set") 635 636 node1.Spec.Taints = []slim_corev1.Taint{ 637 { 638 Key: pkgOption.Config.AgentNotReadyNodeTaintValue(), Value: "Foo", 639 }, 640 { 641 Key: "DoNoRemoveThisTaint", Value: "Foo", 642 }, 643 } 644 645 // Re-trigger pod; ensure no patch is received, 646 ciliumPodQueue.Add(key) 647 continueProcess = processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 648 require.True(t, continueProcess) 649 650 err = testutils.WaitUntil(func() bool { 651 select { 652 case <-patchReceived: 653 return true 654 default: 655 return false 656 } 657 }, 1*time.Second) 658 require.Error(t, err, "no patch should have been received; code should short-circuit") 659 660 // Set pod to Ready, ensure taint is removed 661 ciliumPodOnNode1.Status.Conditions[0].Status = slim_corev1.ConditionTrue 662 ciliumPodQueue.Add(key) 663 continueProcess = processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 664 require.True(t, continueProcess) 665 err = testutils.WaitUntil(func() bool { 666 select { 667 case p := <-patchReceived: 668 taintSet = p 669 return true 670 default: 671 return false 672 } 673 }, 1*time.Second) 674 require.NoError(t, err) 675 require.False(t, taintSet, "Ready Pod should cause node taint to be removed") 676 677 // Re-trigger pod; ensure no patch is received, 678 node1.Spec.Taints = []slim_corev1.Taint{node1.Spec.Taints[1]} 679 ciliumPodQueue.Add(key) 680 continueProcess = processNextCiliumPodItem(fakeClient, fng, ciliumPodQueue) 681 require.True(t, continueProcess) 682 683 err = testutils.WaitUntil(func() bool { 684 select { 685 case <-patchReceived: 686 return true 687 default: 688 return false 689 } 690 }, 1*time.Second) 691 require.Error(t, err, "no patch should have been received; code should short-circuit") 692 }