k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/tainteviction/taint_eviction_test.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tainteviction
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	goruntime "runtime"
    23  	"sort"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/google/go-cmp/cmp"
    28  
    29  	corev1 "k8s.io/api/core/v1"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/fields"
    32  	"k8s.io/apimachinery/pkg/labels"
    33  	"k8s.io/apimachinery/pkg/types"
    34  	"k8s.io/apimachinery/pkg/util/wait"
    35  	"k8s.io/apiserver/pkg/util/feature"
    36  	"k8s.io/client-go/informers"
    37  	"k8s.io/client-go/kubernetes/fake"
    38  	clienttesting "k8s.io/client-go/testing"
    39  	"k8s.io/client-go/tools/cache"
    40  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    41  	"k8s.io/kubernetes/pkg/controller/testutil"
    42  	"k8s.io/kubernetes/pkg/features"
    43  )
    44  
    45  var timeForControllerToProgressForSanityCheck = 20 * time.Millisecond
    46  
    47  func getPodsAssignedToNode(ctx context.Context, c *fake.Clientset) GetPodsByNodeNameFunc {
    48  	return func(nodeName string) ([]*corev1.Pod, error) {
    49  		selector := fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName})
    50  		pods, err := c.CoreV1().Pods(corev1.NamespaceAll).List(ctx, metav1.ListOptions{
    51  			FieldSelector: selector.String(),
    52  			LabelSelector: labels.Everything().String(),
    53  		})
    54  		if err != nil {
    55  			return []*corev1.Pod{}, fmt.Errorf("failed to get Pods assigned to node %v", nodeName)
    56  		}
    57  		rPods := make([]*corev1.Pod, len(pods.Items))
    58  		for i := range pods.Items {
    59  			rPods[i] = &pods.Items[i]
    60  		}
    61  		return rPods, nil
    62  	}
    63  }
    64  
    65  func createNoExecuteTaint(index int) corev1.Taint {
    66  	now := metav1.Now()
    67  	return corev1.Taint{
    68  		Key:       "testTaint" + fmt.Sprintf("%v", index),
    69  		Value:     "test" + fmt.Sprintf("%v", index),
    70  		Effect:    corev1.TaintEffectNoExecute,
    71  		TimeAdded: &now,
    72  	}
    73  }
    74  
    75  func addToleration(pod *corev1.Pod, index int, duration int64) *corev1.Pod {
    76  	if pod.Annotations == nil {
    77  		pod.Annotations = map[string]string{}
    78  	}
    79  	if duration < 0 {
    80  		pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute}}
    81  
    82  	} else {
    83  		pod.Spec.Tolerations = []corev1.Toleration{{Key: "testTaint" + fmt.Sprintf("%v", index), Value: "test" + fmt.Sprintf("%v", index), Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &duration}}
    84  	}
    85  	return pod
    86  }
    87  
    88  func addTaintsToNode(node *corev1.Node, key, value string, indices []int) *corev1.Node {
    89  	taints := []corev1.Taint{}
    90  	for _, index := range indices {
    91  		taints = append(taints, createNoExecuteTaint(index))
    92  	}
    93  	node.Spec.Taints = taints
    94  	return node
    95  }
    96  
    97  var alwaysReady = func() bool { return true }
    98  
    99  func setupNewController(ctx context.Context, fakeClientSet *fake.Clientset) (*Controller, cache.Indexer, cache.Indexer) {
   100  	informerFactory := informers.NewSharedInformerFactory(fakeClientSet, 0)
   101  	podIndexer := informerFactory.Core().V1().Pods().Informer().GetIndexer()
   102  	nodeIndexer := informerFactory.Core().V1().Nodes().Informer().GetIndexer()
   103  	mgr, _ := New(ctx, fakeClientSet, informerFactory.Core().V1().Pods(), informerFactory.Core().V1().Nodes(), "taint-eviction-controller")
   104  	mgr.podListerSynced = alwaysReady
   105  	mgr.nodeListerSynced = alwaysReady
   106  	mgr.getPodsAssignedToNode = getPodsAssignedToNode(ctx, fakeClientSet)
   107  	return mgr, podIndexer, nodeIndexer
   108  }
   109  
   110  type timestampedPod struct {
   111  	names     []string
   112  	timestamp time.Duration
   113  }
   114  
   115  type durationSlice []timestampedPod
   116  
   117  func (a durationSlice) Len() int           { return len(a) }
   118  func (a durationSlice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   119  func (a durationSlice) Less(i, j int) bool { return a[i].timestamp < a[j].timestamp }
   120  
   121  func TestFilterNoExecuteTaints(t *testing.T) {
   122  	taints := []corev1.Taint{
   123  		{
   124  			Key:    "one",
   125  			Value:  "one",
   126  			Effect: corev1.TaintEffectNoExecute,
   127  		},
   128  		{
   129  			Key:    "two",
   130  			Value:  "two",
   131  			Effect: corev1.TaintEffectNoSchedule,
   132  		},
   133  	}
   134  	taints = getNoExecuteTaints(taints)
   135  	if len(taints) != 1 || taints[0].Key != "one" {
   136  		t.Errorf("Filtering doesn't work. Got %v", taints)
   137  	}
   138  }
   139  
   140  func TestCreatePod(t *testing.T) {
   141  	testCases := []struct {
   142  		description                   string
   143  		pod                           *corev1.Pod
   144  		taintedNodes                  map[string][]corev1.Taint
   145  		expectPatch                   bool
   146  		expectDelete                  bool
   147  		enablePodDisruptionConditions bool
   148  	}{
   149  		{
   150  			description:  "not scheduled - ignore",
   151  			pod:          testutil.NewPod("pod1", ""),
   152  			taintedNodes: map[string][]corev1.Taint{},
   153  			expectDelete: false,
   154  		},
   155  		{
   156  			description:  "scheduled on untainted Node",
   157  			pod:          testutil.NewPod("pod1", "node1"),
   158  			taintedNodes: map[string][]corev1.Taint{},
   159  			expectDelete: false,
   160  		},
   161  		{
   162  			description: "schedule on tainted Node",
   163  			pod:         testutil.NewPod("pod1", "node1"),
   164  			taintedNodes: map[string][]corev1.Taint{
   165  				"node1": {createNoExecuteTaint(1)},
   166  			},
   167  			expectDelete: true,
   168  		},
   169  		{
   170  			description: "schedule on tainted Node; PodDisruptionConditions enabled",
   171  			pod:         testutil.NewPod("pod1", "node1"),
   172  			taintedNodes: map[string][]corev1.Taint{
   173  				"node1": {createNoExecuteTaint(1)},
   174  			},
   175  			expectPatch:                   true,
   176  			expectDelete:                  true,
   177  			enablePodDisruptionConditions: true,
   178  		},
   179  		{
   180  			description: "schedule on tainted Node with finite toleration",
   181  			pod:         addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   182  			taintedNodes: map[string][]corev1.Taint{
   183  				"node1": {createNoExecuteTaint(1)},
   184  			},
   185  			expectDelete: false,
   186  		},
   187  		{
   188  			description: "schedule on tainted Node with infinite toleration",
   189  			pod:         addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
   190  			taintedNodes: map[string][]corev1.Taint{
   191  				"node1": {createNoExecuteTaint(1)},
   192  			},
   193  			expectDelete: false,
   194  		},
   195  		{
   196  			description: "schedule on tainted Node with infinite ivalid toleration",
   197  			pod:         addToleration(testutil.NewPod("pod1", "node1"), 2, -1),
   198  			taintedNodes: map[string][]corev1.Taint{
   199  				"node1": {createNoExecuteTaint(1)},
   200  			},
   201  			expectDelete: true,
   202  		},
   203  	}
   204  
   205  	for _, item := range testCases {
   206  		t.Run(item.description, func(t *testing.T) {
   207  			featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)
   208  			ctx, cancel := context.WithCancel(context.Background())
   209  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.pod}})
   210  			controller, podIndexer, _ := setupNewController(ctx, fakeClientset)
   211  			controller.recorder = testutil.NewFakeRecorder()
   212  			go controller.Run(ctx)
   213  			controller.taintedNodes = item.taintedNodes
   214  
   215  			podIndexer.Add(item.pod)
   216  			controller.PodUpdated(nil, item.pod)
   217  
   218  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   219  
   220  			cancel()
   221  		})
   222  	}
   223  }
   224  
   225  func TestDeletePod(t *testing.T) {
   226  	ctx, cancel := context.WithCancel(context.Background())
   227  	defer cancel()
   228  
   229  	fakeClientset := fake.NewSimpleClientset()
   230  	controller, _, _ := setupNewController(ctx, fakeClientset)
   231  	controller.recorder = testutil.NewFakeRecorder()
   232  	go controller.Run(ctx)
   233  	controller.taintedNodes = map[string][]corev1.Taint{
   234  		"node1": {createNoExecuteTaint(1)},
   235  	}
   236  	controller.PodUpdated(testutil.NewPod("pod1", "node1"), nil)
   237  	// wait a bit to see if nothing will panic
   238  	time.Sleep(timeForControllerToProgressForSanityCheck)
   239  }
   240  
   241  func TestUpdatePod(t *testing.T) {
   242  	testCases := []struct {
   243  		description                   string
   244  		prevPod                       *corev1.Pod
   245  		awaitForScheduledEviction     bool
   246  		newPod                        *corev1.Pod
   247  		taintedNodes                  map[string][]corev1.Taint
   248  		expectPatch                   bool
   249  		expectDelete                  bool
   250  		enablePodDisruptionConditions bool
   251  		skipOnWindows                 bool
   252  	}{
   253  		{
   254  			description: "scheduling onto tainted Node results in patch and delete when PodDisruptionConditions enabled",
   255  			prevPod:     testutil.NewPod("pod1", ""),
   256  			newPod:      testutil.NewPod("pod1", "node1"),
   257  			taintedNodes: map[string][]corev1.Taint{
   258  				"node1": {createNoExecuteTaint(1)},
   259  			},
   260  			expectPatch:                   true,
   261  			expectDelete:                  true,
   262  			enablePodDisruptionConditions: true,
   263  		},
   264  		{
   265  			description: "scheduling onto tainted Node",
   266  			prevPod:     testutil.NewPod("pod1", ""),
   267  			newPod:      testutil.NewPod("pod1", "node1"),
   268  			taintedNodes: map[string][]corev1.Taint{
   269  				"node1": {createNoExecuteTaint(1)},
   270  			},
   271  			expectDelete: true,
   272  		},
   273  		{
   274  			description: "scheduling onto tainted Node with toleration",
   275  			prevPod:     addToleration(testutil.NewPod("pod1", ""), 1, -1),
   276  			newPod:      addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
   277  			taintedNodes: map[string][]corev1.Taint{
   278  				"node1": {createNoExecuteTaint(1)},
   279  			},
   280  			expectDelete: false,
   281  		},
   282  		{
   283  			description:               "removing toleration",
   284  			prevPod:                   addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   285  			newPod:                    testutil.NewPod("pod1", "node1"),
   286  			awaitForScheduledEviction: true,
   287  			taintedNodes: map[string][]corev1.Taint{
   288  				"node1": {createNoExecuteTaint(1)},
   289  			},
   290  			expectDelete: true,
   291  		},
   292  		{
   293  			description:               "lengthening toleration shouldn't work",
   294  			prevPod:                   addToleration(testutil.NewPod("pod1", "node1"), 1, 1),
   295  			newPod:                    addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   296  			awaitForScheduledEviction: true,
   297  			taintedNodes: map[string][]corev1.Taint{
   298  				"node1": {createNoExecuteTaint(1)},
   299  			},
   300  			expectDelete:  true,
   301  			skipOnWindows: true,
   302  		},
   303  	}
   304  
   305  	for _, item := range testCases {
   306  		t.Run(item.description, func(t *testing.T) {
   307  			if item.skipOnWindows && goruntime.GOOS == "windows" {
   308  				// TODO: remove skip once the flaking test has been fixed.
   309  				t.Skip("Skip flaking test on Windows.")
   310  			}
   311  			featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)
   312  			ctx, cancel := context.WithCancel(context.Background())
   313  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: []corev1.Pod{*item.prevPod}})
   314  			controller, podIndexer, _ := setupNewController(context.TODO(), fakeClientset)
   315  			controller.recorder = testutil.NewFakeRecorder()
   316  			controller.taintedNodes = item.taintedNodes
   317  			go controller.Run(ctx)
   318  
   319  			podIndexer.Add(item.prevPod)
   320  			controller.PodUpdated(nil, item.prevPod)
   321  
   322  			if item.awaitForScheduledEviction {
   323  				nsName := types.NamespacedName{Namespace: item.prevPod.Namespace, Name: item.prevPod.Name}
   324  				err := wait.PollImmediate(time.Millisecond*10, time.Second, func() (bool, error) {
   325  					scheduledEviction := controller.taintEvictionQueue.GetWorkerUnsafe(nsName.String())
   326  					return scheduledEviction != nil, nil
   327  				})
   328  				if err != nil {
   329  					t.Fatalf("Failed to await for scheduled eviction: %q", err)
   330  				}
   331  			}
   332  
   333  			podIndexer.Update(item.newPod)
   334  			controller.PodUpdated(item.prevPod, item.newPod)
   335  
   336  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   337  			cancel()
   338  		})
   339  	}
   340  }
   341  
   342  func TestCreateNode(t *testing.T) {
   343  	testCases := []struct {
   344  		description  string
   345  		pods         []corev1.Pod
   346  		node         *corev1.Node
   347  		expectPatch  bool
   348  		expectDelete bool
   349  	}{
   350  		{
   351  			description: "Creating Node matching already assigned Pod",
   352  			pods: []corev1.Pod{
   353  				*testutil.NewPod("pod1", "node1"),
   354  			},
   355  			node:         testutil.NewNode("node1"),
   356  			expectPatch:  false,
   357  			expectDelete: false,
   358  		},
   359  		{
   360  			description: "Creating tainted Node matching already assigned Pod",
   361  			pods: []corev1.Pod{
   362  				*testutil.NewPod("pod1", "node1"),
   363  			},
   364  			node:         addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   365  			expectPatch:  true,
   366  			expectDelete: true,
   367  		},
   368  		{
   369  			description: "Creating tainted Node matching already assigned tolerating Pod",
   370  			pods: []corev1.Pod{
   371  				*addToleration(testutil.NewPod("pod1", "node1"), 1, -1),
   372  			},
   373  			node:         addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   374  			expectPatch:  false,
   375  			expectDelete: false,
   376  		},
   377  	}
   378  
   379  	for _, item := range testCases {
   380  		ctx, cancel := context.WithCancel(context.Background())
   381  		fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   382  		controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   383  		nodeIndexer.Add(item.node)
   384  		controller.recorder = testutil.NewFakeRecorder()
   385  		go controller.Run(ctx)
   386  		controller.NodeUpdated(nil, item.node)
   387  
   388  		verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   389  
   390  		cancel()
   391  	}
   392  }
   393  
   394  func TestDeleteNode(t *testing.T) {
   395  	ctx, cancel := context.WithCancel(context.Background())
   396  	fakeClientset := fake.NewSimpleClientset()
   397  	controller, _, _ := setupNewController(ctx, fakeClientset)
   398  	controller.recorder = testutil.NewFakeRecorder()
   399  	controller.taintedNodes = map[string][]corev1.Taint{
   400  		"node1": {createNoExecuteTaint(1)},
   401  	}
   402  	go controller.Run(ctx)
   403  	controller.NodeUpdated(testutil.NewNode("node1"), nil)
   404  
   405  	// await until controller.taintedNodes is empty
   406  	err := wait.PollImmediate(10*time.Millisecond, time.Second, func() (bool, error) {
   407  		controller.taintedNodesLock.Lock()
   408  		defer controller.taintedNodesLock.Unlock()
   409  		_, ok := controller.taintedNodes["node1"]
   410  		return !ok, nil
   411  	})
   412  	if err != nil {
   413  		t.Errorf("Failed to await for processing node deleted: %q", err)
   414  	}
   415  	cancel()
   416  }
   417  
   418  func TestUpdateNode(t *testing.T) {
   419  	testCases := []struct {
   420  		description                   string
   421  		pods                          []corev1.Pod
   422  		oldNode                       *corev1.Node
   423  		newNode                       *corev1.Node
   424  		expectPatch                   bool
   425  		expectDelete                  bool
   426  		additionalSleep               time.Duration
   427  		enablePodDisruptionConditions bool
   428  	}{
   429  		{
   430  			description: "Added taint, expect node patched and deleted when PodDisruptionConditions is enabled",
   431  			pods: []corev1.Pod{
   432  				*testutil.NewPod("pod1", "node1"),
   433  			},
   434  			oldNode:                       testutil.NewNode("node1"),
   435  			newNode:                       addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   436  			expectPatch:                   true,
   437  			expectDelete:                  true,
   438  			enablePodDisruptionConditions: true,
   439  		},
   440  		{
   441  			description: "Added taint",
   442  			pods: []corev1.Pod{
   443  				*testutil.NewPod("pod1", "node1"),
   444  			},
   445  			oldNode:      testutil.NewNode("node1"),
   446  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   447  			expectDelete: true,
   448  		},
   449  		{
   450  			description: "Added tolerated taint",
   451  			pods: []corev1.Pod{
   452  				*addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   453  			},
   454  			oldNode:      testutil.NewNode("node1"),
   455  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   456  			expectDelete: false,
   457  		},
   458  		{
   459  			description: "Only one added taint tolerated",
   460  			pods: []corev1.Pod{
   461  				*addToleration(testutil.NewPod("pod1", "node1"), 1, 100),
   462  			},
   463  			oldNode:      testutil.NewNode("node1"),
   464  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
   465  			expectDelete: true,
   466  		},
   467  		{
   468  			description: "Taint removed",
   469  			pods: []corev1.Pod{
   470  				*addToleration(testutil.NewPod("pod1", "node1"), 1, 1),
   471  			},
   472  			oldNode:         addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   473  			newNode:         testutil.NewNode("node1"),
   474  			expectDelete:    false,
   475  			additionalSleep: 1500 * time.Millisecond,
   476  		},
   477  		{
   478  			description: "Pod with multiple tolerations are evicted when first one runs out",
   479  			pods: []corev1.Pod{
   480  				{
   481  					ObjectMeta: metav1.ObjectMeta{
   482  						Namespace: "default",
   483  						Name:      "pod1",
   484  					},
   485  					Spec: corev1.PodSpec{
   486  						NodeName: "node1",
   487  						Tolerations: []corev1.Toleration{
   488  							{Key: "testTaint1", Value: "test1", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{1}[0]},
   489  							{Key: "testTaint2", Value: "test2", Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &[]int64{100}[0]},
   490  						},
   491  					},
   492  					Status: corev1.PodStatus{
   493  						Conditions: []corev1.PodCondition{
   494  							{
   495  								Type:   corev1.PodReady,
   496  								Status: corev1.ConditionTrue,
   497  							},
   498  						},
   499  					},
   500  				},
   501  			},
   502  			oldNode:      testutil.NewNode("node1"),
   503  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
   504  			expectDelete: true,
   505  		},
   506  	}
   507  
   508  	for _, item := range testCases {
   509  		t.Run(item.description, func(t *testing.T) {
   510  			featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.PodDisruptionConditions, item.enablePodDisruptionConditions)
   511  			ctx, cancel := context.WithCancel(context.Background())
   512  			defer cancel()
   513  
   514  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   515  			controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   516  			nodeIndexer.Add(item.newNode)
   517  			controller.recorder = testutil.NewFakeRecorder()
   518  			go controller.Run(ctx)
   519  			controller.NodeUpdated(item.oldNode, item.newNode)
   520  
   521  			if item.additionalSleep > 0 {
   522  				time.Sleep(item.additionalSleep)
   523  			}
   524  
   525  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   526  		})
   527  	}
   528  }
   529  
   530  func TestUpdateNodeWithMultipleTaints(t *testing.T) {
   531  	taint1 := createNoExecuteTaint(1)
   532  	taint2 := createNoExecuteTaint(2)
   533  
   534  	minute := int64(60)
   535  	pod := testutil.NewPod("pod1", "node1")
   536  	pod.Spec.Tolerations = []corev1.Toleration{
   537  		{Key: taint1.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute},
   538  		{Key: taint2.Key, Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute, TolerationSeconds: &minute},
   539  	}
   540  	podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
   541  
   542  	untaintedNode := testutil.NewNode("node1")
   543  
   544  	doubleTaintedNode := testutil.NewNode("node1")
   545  	doubleTaintedNode.Spec.Taints = []corev1.Taint{taint1, taint2}
   546  
   547  	singleTaintedNode := testutil.NewNode("node1")
   548  	singleTaintedNode.Spec.Taints = []corev1.Taint{taint1}
   549  
   550  	ctx, cancel := context.WithCancel(context.TODO())
   551  	fakeClientset := fake.NewSimpleClientset(pod)
   552  	controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   553  	controller.recorder = testutil.NewFakeRecorder()
   554  	go controller.Run(ctx)
   555  
   556  	// no taint
   557  	nodeIndexer.Add(untaintedNode)
   558  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   559  	// verify pod is not queued for deletion
   560  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
   561  		t.Fatalf("pod queued for deletion with no taints")
   562  	}
   563  
   564  	// no taint -> infinitely tolerated taint
   565  	nodeIndexer.Update(singleTaintedNode)
   566  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   567  	// verify pod is not queued for deletion
   568  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
   569  		t.Fatalf("pod queued for deletion with permanently tolerated taint")
   570  	}
   571  
   572  	// infinitely tolerated taint -> temporarily tolerated taint
   573  	nodeIndexer.Update(doubleTaintedNode)
   574  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   575  	// verify pod is queued for deletion
   576  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) == nil {
   577  		t.Fatalf("pod not queued for deletion after addition of temporarily tolerated taint")
   578  	}
   579  
   580  	// temporarily tolerated taint -> infinitely tolerated taint
   581  	nodeIndexer.Update(singleTaintedNode)
   582  	controller.handleNodeUpdate(ctx, nodeUpdateItem{"node1"})
   583  	// verify pod is not queued for deletion
   584  	if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
   585  		t.Fatalf("pod queued for deletion after removal of temporarily tolerated taint")
   586  	}
   587  
   588  	// verify pod is not deleted
   589  	for _, action := range fakeClientset.Actions() {
   590  		if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
   591  			t.Error("Unexpected deletion")
   592  		}
   593  	}
   594  	cancel()
   595  }
   596  
   597  func TestUpdateNodeWithMultiplePods(t *testing.T) {
   598  	testCases := []struct {
   599  		description         string
   600  		pods                []corev1.Pod
   601  		oldNode             *corev1.Node
   602  		newNode             *corev1.Node
   603  		expectedDeleteTimes durationSlice
   604  	}{
   605  		{
   606  			description: "Pods with different toleration times are evicted appropriately",
   607  			pods: []corev1.Pod{
   608  				*testutil.NewPod("pod1", "node1"),
   609  				*addToleration(testutil.NewPod("pod2", "node1"), 1, 1),
   610  				*addToleration(testutil.NewPod("pod3", "node1"), 1, -1),
   611  			},
   612  			oldNode: testutil.NewNode("node1"),
   613  			newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   614  			expectedDeleteTimes: durationSlice{
   615  				{[]string{"pod1"}, 0},
   616  				{[]string{"pod2"}, time.Second},
   617  			},
   618  		},
   619  		{
   620  			description: "Evict all pods not matching all taints instantly",
   621  			pods: []corev1.Pod{
   622  				*testutil.NewPod("pod1", "node1"),
   623  				*addToleration(testutil.NewPod("pod2", "node1"), 1, 1),
   624  				*addToleration(testutil.NewPod("pod3", "node1"), 1, -1),
   625  			},
   626  			oldNode: testutil.NewNode("node1"),
   627  			newNode: addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1, 2}),
   628  			expectedDeleteTimes: durationSlice{
   629  				{[]string{"pod1", "pod2", "pod3"}, 0},
   630  			},
   631  		},
   632  	}
   633  
   634  	for _, item := range testCases {
   635  		t.Run(item.description, func(t *testing.T) {
   636  			t.Logf("Starting testcase %q", item.description)
   637  			ctx, cancel := context.WithCancel(context.Background())
   638  			defer cancel()
   639  
   640  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   641  			sort.Sort(item.expectedDeleteTimes)
   642  			controller, _, nodeIndexer := setupNewController(ctx, fakeClientset)
   643  			nodeIndexer.Add(item.newNode)
   644  			controller.recorder = testutil.NewFakeRecorder()
   645  			go controller.Run(ctx)
   646  			controller.NodeUpdated(item.oldNode, item.newNode)
   647  
   648  			startedAt := time.Now()
   649  			for i := range item.expectedDeleteTimes {
   650  				if i == 0 || item.expectedDeleteTimes[i-1].timestamp != item.expectedDeleteTimes[i].timestamp {
   651  					// compute a grace duration to give controller time to process updates. Choose big
   652  					// enough intervals in the test cases above to avoid flakes.
   653  					var increment time.Duration
   654  					if i == len(item.expectedDeleteTimes)-1 || item.expectedDeleteTimes[i+1].timestamp == item.expectedDeleteTimes[i].timestamp {
   655  						increment = 500 * time.Millisecond
   656  					} else {
   657  						increment = ((item.expectedDeleteTimes[i+1].timestamp - item.expectedDeleteTimes[i].timestamp) / time.Duration(2))
   658  					}
   659  
   660  					sleepTime := item.expectedDeleteTimes[i].timestamp - time.Since(startedAt) + increment
   661  					if sleepTime < 0 {
   662  						sleepTime = 0
   663  					}
   664  					t.Logf("Sleeping for %v", sleepTime)
   665  					time.Sleep(sleepTime)
   666  				}
   667  
   668  				for delay, podName := range item.expectedDeleteTimes[i].names {
   669  					deleted := false
   670  					for _, action := range fakeClientset.Actions() {
   671  						deleteAction, ok := action.(clienttesting.DeleteActionImpl)
   672  						if !ok {
   673  							t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb())
   674  							continue
   675  						}
   676  						if deleteAction.GetResource().Resource != "pods" {
   677  							continue
   678  						}
   679  						if podName == deleteAction.GetName() {
   680  							deleted = true
   681  						}
   682  					}
   683  					if !deleted {
   684  						t.Errorf("Failed to deleted pod %v after %v", podName, delay)
   685  					}
   686  				}
   687  				for _, action := range fakeClientset.Actions() {
   688  					deleteAction, ok := action.(clienttesting.DeleteActionImpl)
   689  					if !ok {
   690  						t.Logf("Found not-delete action with verb %v. Ignoring.", action.GetVerb())
   691  						continue
   692  					}
   693  					if deleteAction.GetResource().Resource != "pods" {
   694  						continue
   695  					}
   696  					deletedPodName := deleteAction.GetName()
   697  					expected := false
   698  					for _, podName := range item.expectedDeleteTimes[i].names {
   699  						if podName == deletedPodName {
   700  							expected = true
   701  						}
   702  					}
   703  					if !expected {
   704  						t.Errorf("Pod %v was deleted even though it shouldn't have", deletedPodName)
   705  					}
   706  				}
   707  				fakeClientset.ClearActions()
   708  			}
   709  		})
   710  	}
   711  }
   712  
   713  func TestGetMinTolerationTime(t *testing.T) {
   714  	one := int64(1)
   715  	two := int64(2)
   716  	oneSec := 1 * time.Second
   717  
   718  	tests := []struct {
   719  		tolerations []corev1.Toleration
   720  		expected    time.Duration
   721  	}{
   722  		{
   723  			tolerations: []corev1.Toleration{},
   724  			expected:    0,
   725  		},
   726  		{
   727  			tolerations: []corev1.Toleration{
   728  				{
   729  					TolerationSeconds: nil,
   730  				},
   731  			},
   732  			expected: -1,
   733  		},
   734  		{
   735  			tolerations: []corev1.Toleration{
   736  				{
   737  					TolerationSeconds: &one,
   738  				},
   739  				{
   740  					TolerationSeconds: &two,
   741  				},
   742  			},
   743  			expected: oneSec,
   744  		},
   745  
   746  		{
   747  			tolerations: []corev1.Toleration{
   748  				{
   749  					TolerationSeconds: &one,
   750  				},
   751  				{
   752  					TolerationSeconds: nil,
   753  				},
   754  			},
   755  			expected: oneSec,
   756  		},
   757  		{
   758  			tolerations: []corev1.Toleration{
   759  				{
   760  					TolerationSeconds: nil,
   761  				},
   762  				{
   763  					TolerationSeconds: &one,
   764  				},
   765  			},
   766  			expected: oneSec,
   767  		},
   768  	}
   769  
   770  	for _, test := range tests {
   771  		got := getMinTolerationTime(test.tolerations)
   772  		if got != test.expected {
   773  			t.Errorf("Incorrect min toleration time: got %v, expected %v", got, test.expected)
   774  		}
   775  	}
   776  }
   777  
   778  // TestEventualConsistency verifies if getPodsAssignedToNode returns incomplete data
   779  // (e.g. due to watch latency), it will reconcile the remaining pods eventually.
   780  // This scenario is partially covered by TestUpdatePods, but given this is an important
   781  // property of TaintManager, it's better to have explicit test for this.
   782  func TestEventualConsistency(t *testing.T) {
   783  	testCases := []struct {
   784  		description  string
   785  		pods         []corev1.Pod
   786  		prevPod      *corev1.Pod
   787  		newPod       *corev1.Pod
   788  		oldNode      *corev1.Node
   789  		newNode      *corev1.Node
   790  		expectPatch  bool
   791  		expectDelete bool
   792  	}{
   793  		{
   794  			description: "existing pod2 scheduled onto tainted Node",
   795  			pods: []corev1.Pod{
   796  				*testutil.NewPod("pod1", "node1"),
   797  			},
   798  			prevPod:      testutil.NewPod("pod2", ""),
   799  			newPod:       testutil.NewPod("pod2", "node1"),
   800  			oldNode:      testutil.NewNode("node1"),
   801  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   802  			expectPatch:  true,
   803  			expectDelete: true,
   804  		},
   805  		{
   806  			description: "existing pod2 with taint toleration scheduled onto tainted Node",
   807  			pods: []corev1.Pod{
   808  				*testutil.NewPod("pod1", "node1"),
   809  			},
   810  			prevPod:      addToleration(testutil.NewPod("pod2", ""), 1, 100),
   811  			newPod:       addToleration(testutil.NewPod("pod2", "node1"), 1, 100),
   812  			oldNode:      testutil.NewNode("node1"),
   813  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   814  			expectPatch:  true,
   815  			expectDelete: true,
   816  		},
   817  		{
   818  			description: "new pod2 created on tainted Node",
   819  			pods: []corev1.Pod{
   820  				*testutil.NewPod("pod1", "node1"),
   821  			},
   822  			prevPod:      nil,
   823  			newPod:       testutil.NewPod("pod2", "node1"),
   824  			oldNode:      testutil.NewNode("node1"),
   825  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   826  			expectPatch:  true,
   827  			expectDelete: true,
   828  		},
   829  		{
   830  			description: "new pod2 with tait toleration created on tainted Node",
   831  			pods: []corev1.Pod{
   832  				*testutil.NewPod("pod1", "node1"),
   833  			},
   834  			prevPod:      nil,
   835  			newPod:       addToleration(testutil.NewPod("pod2", "node1"), 1, 100),
   836  			oldNode:      testutil.NewNode("node1"),
   837  			newNode:      addTaintsToNode(testutil.NewNode("node1"), "testTaint1", "taint1", []int{1}),
   838  			expectPatch:  true,
   839  			expectDelete: true,
   840  		},
   841  	}
   842  
   843  	for _, item := range testCases {
   844  		t.Run(item.description, func(t *testing.T) {
   845  			ctx, cancel := context.WithCancel(context.Background())
   846  			defer cancel()
   847  
   848  			fakeClientset := fake.NewSimpleClientset(&corev1.PodList{Items: item.pods})
   849  			controller, podIndexer, nodeIndexer := setupNewController(ctx, fakeClientset)
   850  			nodeIndexer.Add(item.newNode)
   851  			controller.recorder = testutil.NewFakeRecorder()
   852  			go controller.Run(ctx)
   853  
   854  			if item.prevPod != nil {
   855  				podIndexer.Add(item.prevPod)
   856  				controller.PodUpdated(nil, item.prevPod)
   857  			}
   858  
   859  			// First we simulate NodeUpdate that should delete 'pod1'. It doesn't know about 'pod2' yet.
   860  			controller.NodeUpdated(item.oldNode, item.newNode)
   861  
   862  			verifyPodActions(t, item.description, fakeClientset, item.expectPatch, item.expectDelete)
   863  			fakeClientset.ClearActions()
   864  
   865  			// And now the delayed update of 'pod2' comes to the TaintManager. We should delete it as well.
   866  			podIndexer.Update(item.newPod)
   867  			controller.PodUpdated(item.prevPod, item.newPod)
   868  			// wait a bit
   869  			time.Sleep(timeForControllerToProgressForSanityCheck)
   870  		})
   871  	}
   872  }
   873  
   874  func verifyPodActions(t *testing.T, description string, fakeClientset *fake.Clientset, expectPatch, expectDelete bool) {
   875  	t.Helper()
   876  	podPatched := false
   877  	podDeleted := false
   878  	// use Poll instead of PollImmediate to give some processing time to the controller that the expected
   879  	// actions are likely to be already sent
   880  	err := wait.Poll(10*time.Millisecond, 5*time.Second, func() (bool, error) {
   881  		for _, action := range fakeClientset.Actions() {
   882  			if action.GetVerb() == "patch" && action.GetResource().Resource == "pods" {
   883  				podPatched = true
   884  			}
   885  			if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
   886  				podDeleted = true
   887  			}
   888  		}
   889  		return podPatched == expectPatch && podDeleted == expectDelete, nil
   890  	})
   891  	if err != nil {
   892  		t.Errorf("Failed waiting for the expected actions: %q", err)
   893  	}
   894  	if podPatched != expectPatch {
   895  		t.Errorf("[%v]Unexpected test result. Expected patch %v, got %v", description, expectPatch, podPatched)
   896  	}
   897  	if podDeleted != expectDelete {
   898  		t.Errorf("[%v]Unexpected test result. Expected delete %v, got %v", description, expectDelete, podDeleted)
   899  	}
   900  }
   901  
   902  // TestPodDeletionEvent Verify that the output events are as expected
   903  func TestPodDeletionEvent(t *testing.T) {
   904  	f := func(path cmp.Path) bool {
   905  		switch path.String() {
   906  		// These fields change at runtime, so ignore it
   907  		case "LastTimestamp", "FirstTimestamp", "ObjectMeta.Name":
   908  			return true
   909  		}
   910  		return false
   911  	}
   912  
   913  	t.Run("emitPodDeletionEvent", func(t *testing.T) {
   914  		controller := &Controller{}
   915  		recorder := testutil.NewFakeRecorder()
   916  		controller.recorder = recorder
   917  		controller.emitPodDeletionEvent(types.NamespacedName{
   918  			Name:      "test",
   919  			Namespace: "test",
   920  		})
   921  		want := []*corev1.Event{
   922  			{
   923  				ObjectMeta: metav1.ObjectMeta{
   924  					Namespace: "test",
   925  				},
   926  				InvolvedObject: corev1.ObjectReference{
   927  					Kind:       "Pod",
   928  					APIVersion: "v1",
   929  					Namespace:  "test",
   930  					Name:       "test",
   931  				},
   932  				Reason:  "TaintManagerEviction",
   933  				Type:    "Normal",
   934  				Count:   1,
   935  				Message: "Marking for deletion Pod test/test",
   936  				Source:  corev1.EventSource{Component: "nodeControllerTest"},
   937  			},
   938  		}
   939  		if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 {
   940  			t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff)
   941  		}
   942  	})
   943  
   944  	t.Run("emitCancelPodDeletionEvent", func(t *testing.T) {
   945  		controller := &Controller{}
   946  		recorder := testutil.NewFakeRecorder()
   947  		controller.recorder = recorder
   948  		controller.emitCancelPodDeletionEvent(types.NamespacedName{
   949  			Name:      "test",
   950  			Namespace: "test",
   951  		})
   952  		want := []*corev1.Event{
   953  			{
   954  				ObjectMeta: metav1.ObjectMeta{
   955  					Namespace: "test",
   956  				},
   957  				InvolvedObject: corev1.ObjectReference{
   958  					Kind:       "Pod",
   959  					APIVersion: "v1",
   960  					Namespace:  "test",
   961  					Name:       "test",
   962  				},
   963  				Reason:  "TaintManagerEviction",
   964  				Type:    "Normal",
   965  				Count:   1,
   966  				Message: "Cancelling deletion of Pod test/test",
   967  				Source:  corev1.EventSource{Component: "nodeControllerTest"},
   968  			},
   969  		}
   970  		if diff := cmp.Diff(want, recorder.Events, cmp.FilterPath(f, cmp.Ignore())); len(diff) > 0 {
   971  			t.Errorf("emitPodDeletionEvent() returned data (-want,+got):\n%s", diff)
   972  		}
   973  	})
   974  }