k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/scheduling/predicates.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/scheduling/predicates.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package scheduling
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"time"
    24  
    25  	v1 "k8s.io/api/core/v1"
    26  	nodev1 "k8s.io/api/node/v1"
    27  	"k8s.io/apimachinery/pkg/api/resource"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/apimachinery/pkg/util/intstr"
    31  	"k8s.io/apimachinery/pkg/util/sets"
    32  	"k8s.io/apimachinery/pkg/util/strategicpatch"
    33  	"k8s.io/apimachinery/pkg/util/uuid"
    34  	utilversion "k8s.io/apimachinery/pkg/util/version"
    35  	clientset "k8s.io/client-go/kubernetes"
    36  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    37  	"k8s.io/kubernetes/test/e2e/feature"
    38  	"k8s.io/kubernetes/test/e2e/framework"
    39  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    40  	e2eruntimeclass "k8s.io/kubernetes/test/e2e/framework/node/runtimeclass"
    41  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    42  	e2epv "k8s.io/kubernetes/test/e2e/framework/pv"
    43  	e2erc "k8s.io/kubernetes/test/e2e/framework/rc"
    44  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    45  	testutils "k8s.io/kubernetes/test/utils"
    46  	imageutils "k8s.io/kubernetes/test/utils/image"
    47  	admissionapi "k8s.io/pod-security-admission/api"
    48  
    49  	"github.com/onsi/ginkgo/v2"
    50  	"github.com/onsi/gomega"
    51  
    52  	// ensure libs have a chance to initialize
    53  	_ "github.com/stretchr/testify/assert"
    54  )
    55  
    56  const (
    57  	maxNumberOfPods int64 = 10
    58  	defaultTimeout        = 3 * time.Minute
    59  )
    60  
    61  var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
    62  
    63  // variable populated in BeforeEach, never modified afterwards
    64  var workerNodes = sets.Set[string]{}
    65  
    66  type pausePodConfig struct {
    67  	Name                              string
    68  	Namespace                         string
    69  	Finalizers                        []string
    70  	Affinity                          *v1.Affinity
    71  	Annotations, Labels, NodeSelector map[string]string
    72  	Resources                         *v1.ResourceRequirements
    73  	RuntimeClassHandler               *string
    74  	Tolerations                       []v1.Toleration
    75  	NodeName                          string
    76  	Ports                             []v1.ContainerPort
    77  	OwnerReferences                   []metav1.OwnerReference
    78  	PriorityClassName                 string
    79  	DeletionGracePeriodSeconds        *int64
    80  	TopologySpreadConstraints         []v1.TopologySpreadConstraint
    81  	SchedulingGates                   []v1.PodSchedulingGate
    82  }
    83  
    84  var _ = SIGDescribe("SchedulerPredicates", framework.WithSerial(), func() {
    85  	var cs clientset.Interface
    86  	var nodeList *v1.NodeList
    87  	var RCName string
    88  	var ns string
    89  	f := framework.NewDefaultFramework("sched-pred")
    90  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
    91  
    92  	ginkgo.AfterEach(func(ctx context.Context) {
    93  		rc, err := cs.CoreV1().ReplicationControllers(ns).Get(ctx, RCName, metav1.GetOptions{})
    94  		if err == nil && *(rc.Spec.Replicas) != 0 {
    95  			ginkgo.By("Cleaning up the replication controller")
    96  			err := e2erc.DeleteRCAndWaitForGC(ctx, f.ClientSet, ns, RCName)
    97  			framework.ExpectNoError(err)
    98  		}
    99  	})
   100  
   101  	ginkgo.BeforeEach(func(ctx context.Context) {
   102  		cs = f.ClientSet
   103  		ns = f.Namespace.Name
   104  		nodeList = &v1.NodeList{}
   105  		var err error
   106  
   107  		e2enode.AllNodesReady(ctx, cs, time.Minute)
   108  
   109  		nodeList, err = e2enode.GetReadySchedulableNodes(ctx, cs)
   110  		if err != nil {
   111  			framework.Logf("Unexpected error occurred: %v", err)
   112  		}
   113  		framework.ExpectNoErrorWithOffset(0, err)
   114  		for _, n := range nodeList.Items {
   115  			workerNodes.Insert(n.Name)
   116  		}
   117  
   118  		err = framework.CheckTestingNSDeletedExcept(ctx, cs, ns)
   119  		framework.ExpectNoError(err)
   120  
   121  		for _, node := range nodeList.Items {
   122  			framework.Logf("\nLogging pods the apiserver thinks is on node %v before test", node.Name)
   123  			printAllPodsOnNode(ctx, cs, node.Name)
   124  		}
   125  
   126  	})
   127  
   128  	// This test verifies we don't allow scheduling of pods in a way that sum of local ephemeral storage resource requests of pods is greater than machines capacity.
   129  	// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
   130  	// It is so because we need to have precise control on what's running in the cluster.
   131  	f.It("validates local ephemeral storage resource limits of pods that are allowed to run", feature.LocalStorageCapacityIsolation, func(ctx context.Context) {
   132  
   133  		e2eskipper.SkipUnlessServerVersionGTE(localStorageVersion, f.ClientSet.Discovery())
   134  
   135  		nodeMaxAllocatable := int64(0)
   136  
   137  		nodeToAllocatableMap := make(map[string]int64)
   138  		for _, node := range nodeList.Items {
   139  			allocatable, found := node.Status.Allocatable[v1.ResourceEphemeralStorage]
   140  			if !found {
   141  				framework.Failf("node.Status.Allocatable %v does not contain entry %v", node.Status.Allocatable, v1.ResourceEphemeralStorage)
   142  			}
   143  			nodeToAllocatableMap[node.Name] = allocatable.Value()
   144  			if nodeMaxAllocatable < allocatable.Value() {
   145  				nodeMaxAllocatable = allocatable.Value()
   146  			}
   147  		}
   148  		WaitForStableCluster(cs, workerNodes)
   149  
   150  		pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
   151  		framework.ExpectNoError(err)
   152  		for _, pod := range pods.Items {
   153  			_, found := nodeToAllocatableMap[pod.Spec.NodeName]
   154  			if found && pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed {
   155  				framework.Logf("Pod %v requesting local ephemeral resource =%v on Node %v", pod.Name, getRequestedStorageEphemeralStorage(pod), pod.Spec.NodeName)
   156  				nodeToAllocatableMap[pod.Spec.NodeName] -= getRequestedStorageEphemeralStorage(pod)
   157  			}
   158  		}
   159  
   160  		var podsNeededForSaturation int
   161  		ephemeralStoragePerPod := nodeMaxAllocatable / maxNumberOfPods
   162  
   163  		framework.Logf("Using pod capacity: %v", ephemeralStoragePerPod)
   164  		for name, leftAllocatable := range nodeToAllocatableMap {
   165  			framework.Logf("Node: %v has local ephemeral resource allocatable: %v", name, leftAllocatable)
   166  			podsNeededForSaturation += (int)(leftAllocatable / ephemeralStoragePerPod)
   167  		}
   168  
   169  		ginkgo.By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster local ephemeral resource and trying to start another one", podsNeededForSaturation))
   170  
   171  		// As the pods are distributed randomly among nodes,
   172  		// it can easily happen that all nodes are saturated
   173  		// and there is no need to create additional pods.
   174  		// StartPods requires at least one pod to replicate.
   175  		if podsNeededForSaturation > 0 {
   176  			framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
   177  				*initPausePod(f, pausePodConfig{
   178  					Name:   "",
   179  					Labels: map[string]string{"name": ""},
   180  					Resources: &v1.ResourceRequirements{
   181  						Limits: v1.ResourceList{
   182  							v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   183  						},
   184  						Requests: v1.ResourceList{
   185  							v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   186  						},
   187  					},
   188  				}), true, framework.Logf))
   189  		}
   190  		podName := "additional-pod"
   191  		conf := pausePodConfig{
   192  			Name:   podName,
   193  			Labels: map[string]string{"name": "additional"},
   194  			Resources: &v1.ResourceRequirements{
   195  				Limits: v1.ResourceList{
   196  					v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   197  				},
   198  				Requests: v1.ResourceList{
   199  					v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStoragePerPod, "DecimalSI"),
   200  				},
   201  			},
   202  		}
   203  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   204  		verifyResult(ctx, cs, podsNeededForSaturation, 1, ns)
   205  	})
   206  
   207  	// This test verifies we don't allow scheduling of pods in a way that sum of limits +
   208  	// associated overhead is greater than machine's capacity.
   209  	// It assumes that cluster add-on pods stay stable and cannot be run in parallel
   210  	// with any other test that touches Nodes or Pods.
   211  	// Because of this we need to have precise control on what's running in the cluster.
   212  	// Test scenario:
   213  	// 1. Find the first ready node on the system, and add a fake resource for test
   214  	// 2. Create one with affinity to the particular node that uses 70% of the fake resource.
   215  	// 3. Wait for the pod to be scheduled.
   216  	// 4. Create another pod with affinity to the particular node that needs 20% of the fake resource and
   217  	//    an overhead set as 25% of the fake resource.
   218  	// 5. Make sure this additional pod is not scheduled.
   219  
   220  	ginkgo.Context("validates pod overhead is considered along with resource limits of pods that are allowed to run", func() {
   221  		var testNodeName string
   222  		var handler string
   223  		var beardsecond v1.ResourceName = "example.com/beardsecond"
   224  
   225  		ginkgo.BeforeEach(func(ctx context.Context) {
   226  			WaitForStableCluster(cs, workerNodes)
   227  			ginkgo.By("Add RuntimeClass and fake resource")
   228  
   229  			// find a node which can run a pod:
   230  			testNodeName = GetNodeThatCanRunPod(ctx, f)
   231  
   232  			// Get node object:
   233  			node, err := cs.CoreV1().Nodes().Get(ctx, testNodeName, metav1.GetOptions{})
   234  			framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName)
   235  
   236  			// update Node API object with a fake resource
   237  			nodeCopy := node.DeepCopy()
   238  			nodeCopy.ResourceVersion = "0"
   239  
   240  			nodeCopy.Status.Capacity[beardsecond] = resource.MustParse("1000")
   241  			_, err = cs.CoreV1().Nodes().UpdateStatus(ctx, nodeCopy, metav1.UpdateOptions{})
   242  			framework.ExpectNoError(err, "unable to apply fake resource to %v", testNodeName)
   243  
   244  			// Register a runtimeClass with overhead set as 25% of the available beard-seconds
   245  			handler = e2eruntimeclass.PreconfiguredRuntimeClassHandler
   246  
   247  			rc := &nodev1.RuntimeClass{
   248  				ObjectMeta: metav1.ObjectMeta{Name: handler},
   249  				Handler:    handler,
   250  				Overhead: &nodev1.Overhead{
   251  					PodFixed: v1.ResourceList{
   252  						beardsecond: resource.MustParse("250"),
   253  					},
   254  				},
   255  			}
   256  			_, err = cs.NodeV1().RuntimeClasses().Create(ctx, rc, metav1.CreateOptions{})
   257  			framework.ExpectNoError(err, "failed to create RuntimeClass resource")
   258  		})
   259  
   260  		ginkgo.AfterEach(func(ctx context.Context) {
   261  			ginkgo.By("Remove fake resource and RuntimeClass")
   262  			// remove fake resource:
   263  			if testNodeName != "" {
   264  				// Get node object:
   265  				node, err := cs.CoreV1().Nodes().Get(ctx, testNodeName, metav1.GetOptions{})
   266  				framework.ExpectNoError(err, "unable to get node object for node %v", testNodeName)
   267  
   268  				nodeCopy := node.DeepCopy()
   269  				// force it to update
   270  				nodeCopy.ResourceVersion = "0"
   271  				delete(nodeCopy.Status.Capacity, beardsecond)
   272  				_, err = cs.CoreV1().Nodes().UpdateStatus(ctx, nodeCopy, metav1.UpdateOptions{})
   273  				framework.ExpectNoError(err, "unable to update node %v", testNodeName)
   274  			}
   275  
   276  			// remove RuntimeClass
   277  			_ = cs.NodeV1().RuntimeClasses().Delete(ctx, e2eruntimeclass.PreconfiguredRuntimeClassHandler, metav1.DeleteOptions{})
   278  		})
   279  
   280  		ginkgo.It("verify pod overhead is accounted for", func(ctx context.Context) {
   281  			if testNodeName == "" {
   282  				framework.Fail("unable to find a node which can run a pod")
   283  			}
   284  
   285  			ginkgo.By("Starting Pod to consume most of the node's resource.")
   286  
   287  			// Create pod which requires 70% of the available beard-seconds.
   288  			fillerPod := createPausePod(ctx, f, pausePodConfig{
   289  				Name: "filler-pod-" + string(uuid.NewUUID()),
   290  				Resources: &v1.ResourceRequirements{
   291  					Requests: v1.ResourceList{beardsecond: resource.MustParse("700")},
   292  					Limits:   v1.ResourceList{beardsecond: resource.MustParse("700")},
   293  				},
   294  			})
   295  
   296  			// Wait for filler pod to schedule.
   297  			framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, fillerPod))
   298  
   299  			ginkgo.By("Creating another pod that requires unavailable amount of resources.")
   300  			// Create another pod that requires 20% of available beard-seconds, but utilizes the RuntimeClass
   301  			// which defines a pod overhead that requires an additional 25%.
   302  			// This pod should remain pending as at least 70% of beard-second in
   303  			// the node are already consumed.
   304  			podName := "additional-pod" + string(uuid.NewUUID())
   305  			conf := pausePodConfig{
   306  				RuntimeClassHandler: &handler,
   307  				Name:                podName,
   308  				Labels:              map[string]string{"name": "additional"},
   309  				Resources: &v1.ResourceRequirements{
   310  					Limits: v1.ResourceList{beardsecond: resource.MustParse("200")},
   311  				},
   312  			}
   313  
   314  			WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   315  			verifyResult(ctx, cs, 1, 1, ns)
   316  		})
   317  	})
   318  
   319  	// This test verifies we don't allow scheduling of pods in a way that sum of
   320  	// resource requests of pods is greater than machines capacity.
   321  	// It assumes that cluster add-on pods stay stable and cannot be run in parallel
   322  	// with any other test that touches Nodes or Pods.
   323  	// It is so because we need to have precise control on what's running in the cluster.
   324  	// Test scenario:
   325  	// 1. Find the amount CPU resources on each node.
   326  	// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
   327  	// 3. Wait for the pods to be scheduled.
   328  	// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
   329  	// 5. Make sure this additional pod is not scheduled.
   330  	/*
   331  		Release: v1.9
   332  		Testname: Scheduler, resource limits
   333  		Description: Scheduling Pods MUST fail if the resource requests exceed Machine capacity.
   334  	*/
   335  	framework.ConformanceIt("validates resource limits of pods that are allowed to run", func(ctx context.Context) {
   336  		WaitForStableCluster(cs, workerNodes)
   337  		nodeMaxAllocatable := int64(0)
   338  		nodeToAllocatableMap := make(map[string]int64)
   339  		for _, node := range nodeList.Items {
   340  			nodeReady := false
   341  			for _, condition := range node.Status.Conditions {
   342  				if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
   343  					nodeReady = true
   344  					break
   345  				}
   346  			}
   347  			if !nodeReady {
   348  				continue
   349  			}
   350  			// Apply node label to each node
   351  			e2enode.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
   352  			e2enode.ExpectNodeHasLabel(ctx, cs, node.Name, "node", node.Name)
   353  			// Find allocatable amount of CPU.
   354  			allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
   355  			if !found {
   356  				framework.Failf("node.Status.Allocatable %v does not contain entry %v", node.Status.Allocatable, v1.ResourceCPU)
   357  			}
   358  			nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
   359  			if nodeMaxAllocatable < allocatable.MilliValue() {
   360  				nodeMaxAllocatable = allocatable.MilliValue()
   361  			}
   362  		}
   363  		// Clean up added labels after this test.
   364  		defer func() {
   365  			for nodeName := range nodeToAllocatableMap {
   366  				e2enode.RemoveLabelOffNode(cs, nodeName, "node")
   367  			}
   368  		}()
   369  
   370  		pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
   371  		framework.ExpectNoError(err)
   372  		for _, pod := range pods.Items {
   373  			_, found := nodeToAllocatableMap[pod.Spec.NodeName]
   374  			if found && pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed {
   375  				framework.Logf("Pod %v requesting resource cpu=%vm on Node %v", pod.Name, getRequestedCPU(pod), pod.Spec.NodeName)
   376  				nodeToAllocatableMap[pod.Spec.NodeName] -= getRequestedCPU(pod)
   377  			}
   378  		}
   379  
   380  		ginkgo.By("Starting Pods to consume most of the cluster CPU.")
   381  		// Create one pod per node that requires 70% of the node remaining CPU.
   382  		fillerPods := []*v1.Pod{}
   383  		for nodeName, cpu := range nodeToAllocatableMap {
   384  			requestedCPU := cpu * 7 / 10
   385  			framework.Logf("Creating a pod which consumes cpu=%vm on Node %v", requestedCPU, nodeName)
   386  			fillerPods = append(fillerPods, createPausePod(ctx, f, pausePodConfig{
   387  				Name: "filler-pod-" + string(uuid.NewUUID()),
   388  				Resources: &v1.ResourceRequirements{
   389  					Limits: v1.ResourceList{
   390  						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
   391  					},
   392  					Requests: v1.ResourceList{
   393  						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
   394  					},
   395  				},
   396  				Affinity: &v1.Affinity{
   397  					NodeAffinity: &v1.NodeAffinity{
   398  						RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   399  							NodeSelectorTerms: []v1.NodeSelectorTerm{
   400  								{
   401  									MatchExpressions: []v1.NodeSelectorRequirement{
   402  										{
   403  											Key:      "node",
   404  											Operator: v1.NodeSelectorOpIn,
   405  											Values:   []string{nodeName},
   406  										},
   407  									},
   408  								},
   409  							},
   410  						},
   411  					},
   412  				},
   413  			}))
   414  		}
   415  		// Wait for filler pods to schedule.
   416  		for _, pod := range fillerPods {
   417  			framework.ExpectNoError(e2epod.WaitForPodRunningInNamespace(ctx, cs, pod))
   418  		}
   419  		ginkgo.By("Creating another pod that requires unavailable amount of CPU.")
   420  		// Create another pod that requires 50% of the largest node CPU resources.
   421  		// This pod should remain pending as at least 70% of CPU of other nodes in
   422  		// the cluster are already consumed.
   423  		podName := "additional-pod"
   424  		conf := pausePodConfig{
   425  			Name:   podName,
   426  			Labels: map[string]string{"name": "additional"},
   427  			Resources: &v1.ResourceRequirements{
   428  				Limits: v1.ResourceList{
   429  					v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
   430  				},
   431  				Requests: v1.ResourceList{
   432  					v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
   433  				},
   434  			},
   435  		}
   436  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   437  		verifyResult(ctx, cs, len(fillerPods), 1, ns)
   438  	})
   439  
   440  	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
   441  	// nonempty Selector set.
   442  	/*
   443  		Release: v1.9
   444  		Testname: Scheduler, node selector not matching
   445  		Description: Create a Pod with a NodeSelector set to a value that does not match a node in the cluster. Since there are no nodes matching the criteria the Pod MUST not be scheduled.
   446  	*/
   447  	framework.ConformanceIt("validates that NodeSelector is respected if not matching", func(ctx context.Context) {
   448  		ginkgo.By("Trying to schedule Pod with nonempty NodeSelector.")
   449  		podName := "restricted-pod"
   450  
   451  		WaitForStableCluster(cs, workerNodes)
   452  
   453  		conf := pausePodConfig{
   454  			Name:   podName,
   455  			Labels: map[string]string{"name": "restricted"},
   456  			NodeSelector: map[string]string{
   457  				"label": "nonempty",
   458  			},
   459  		}
   460  
   461  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   462  		verifyResult(ctx, cs, 0, 1, ns)
   463  	})
   464  
   465  	/*
   466  		Release: v1.9
   467  		Testname: Scheduler, node selector matching
   468  		Description: Create a label on the node {k: v}. Then create a Pod with a NodeSelector set to {k: v}. Check to see if the Pod is scheduled. When the NodeSelector matches then Pod MUST be scheduled on that node.
   469  	*/
   470  	framework.ConformanceIt("validates that NodeSelector is respected if matching", func(ctx context.Context) {
   471  		nodeName := GetNodeThatCanRunPod(ctx, f)
   472  
   473  		ginkgo.By("Trying to apply a random label on the found node.")
   474  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   475  		v := "42"
   476  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   477  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   478  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   479  
   480  		ginkgo.By("Trying to relaunch the pod, now with labels.")
   481  		labelPodName := "with-labels"
   482  		createPausePod(ctx, f, pausePodConfig{
   483  			Name: labelPodName,
   484  			NodeSelector: map[string]string{
   485  				k: v,
   486  			},
   487  		})
   488  
   489  		// check that pod got scheduled. We intentionally DO NOT check that the
   490  		// pod is running because this will create a race condition with the
   491  		// kubelet and the scheduler: the scheduler might have scheduled a pod
   492  		// already when the kubelet does not know about its new label yet. The
   493  		// kubelet will then refuse to launch the pod.
   494  		framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, cs, ns, labelPodName))
   495  		labelPod, err := cs.CoreV1().Pods(ns).Get(ctx, labelPodName, metav1.GetOptions{})
   496  		framework.ExpectNoError(err)
   497  		gomega.Expect(labelPod.Spec.NodeName).To(gomega.Equal(nodeName))
   498  	})
   499  
   500  	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
   501  	// non-nil NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.
   502  	ginkgo.It("validates that NodeAffinity is respected if not matching", func(ctx context.Context) {
   503  		ginkgo.By("Trying to schedule Pod with nonempty NodeSelector.")
   504  		podName := "restricted-pod"
   505  
   506  		WaitForStableCluster(cs, workerNodes)
   507  
   508  		conf := pausePodConfig{
   509  			Name: podName,
   510  			Affinity: &v1.Affinity{
   511  				NodeAffinity: &v1.NodeAffinity{
   512  					RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   513  						NodeSelectorTerms: []v1.NodeSelectorTerm{
   514  							{
   515  								MatchExpressions: []v1.NodeSelectorRequirement{
   516  									{
   517  										Key:      "foo",
   518  										Operator: v1.NodeSelectorOpIn,
   519  										Values:   []string{"bar", "value2"},
   520  									},
   521  								},
   522  							}, {
   523  								MatchExpressions: []v1.NodeSelectorRequirement{
   524  									{
   525  										Key:      "diffkey",
   526  										Operator: v1.NodeSelectorOpIn,
   527  										Values:   []string{"wrong", "value2"},
   528  									},
   529  								},
   530  							},
   531  						},
   532  					},
   533  				},
   534  			},
   535  			Labels: map[string]string{"name": "restricted"},
   536  		}
   537  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podName, false)
   538  		verifyResult(ctx, cs, 0, 1, ns)
   539  	})
   540  
   541  	// Keep the same steps with the test on NodeSelector,
   542  	// but specify Affinity in Pod.Spec.Affinity, instead of NodeSelector.
   543  	ginkgo.It("validates that required NodeAffinity setting is respected if matching", func(ctx context.Context) {
   544  		nodeName := GetNodeThatCanRunPod(ctx, f)
   545  
   546  		ginkgo.By("Trying to apply a random label on the found node.")
   547  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   548  		v := "42"
   549  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   550  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   551  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   552  
   553  		ginkgo.By("Trying to relaunch the pod, now with labels.")
   554  		labelPodName := "with-labels"
   555  		createPausePod(ctx, f, pausePodConfig{
   556  			Name: labelPodName,
   557  			Affinity: &v1.Affinity{
   558  				NodeAffinity: &v1.NodeAffinity{
   559  					RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   560  						NodeSelectorTerms: []v1.NodeSelectorTerm{
   561  							{
   562  								MatchExpressions: []v1.NodeSelectorRequirement{
   563  									{
   564  										Key:      k,
   565  										Operator: v1.NodeSelectorOpIn,
   566  										Values:   []string{v},
   567  									},
   568  								},
   569  							},
   570  						},
   571  					},
   572  				},
   573  			},
   574  		})
   575  
   576  		// check that pod got scheduled. We intentionally DO NOT check that the
   577  		// pod is running because this will create a race condition with the
   578  		// kubelet and the scheduler: the scheduler might have scheduled a pod
   579  		// already when the kubelet does not know about its new label yet. The
   580  		// kubelet will then refuse to launch the pod.
   581  		framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, cs, ns, labelPodName))
   582  		labelPod, err := cs.CoreV1().Pods(ns).Get(ctx, labelPodName, metav1.GetOptions{})
   583  		framework.ExpectNoError(err)
   584  		gomega.Expect(labelPod.Spec.NodeName).To(gomega.Equal(nodeName))
   585  	})
   586  
   587  	// 1. Run a pod to get an available node, then delete the pod
   588  	// 2. Taint the node with a random taint
   589  	// 3. Try to relaunch the pod with tolerations tolerate the taints on node,
   590  	// and the pod's nodeName specified to the name of node found in step 1
   591  	ginkgo.It("validates that taints-tolerations is respected if matching", func(ctx context.Context) {
   592  		nodeName := getNodeThatCanRunPodWithoutToleration(ctx, f)
   593  
   594  		ginkgo.By("Trying to apply a random taint on the found node.")
   595  		testTaint := v1.Taint{
   596  			Key:    fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
   597  			Value:  "testing-taint-value",
   598  			Effect: v1.TaintEffectNoSchedule,
   599  		}
   600  		e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint)
   601  		e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint)
   602  		ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint)
   603  
   604  		ginkgo.By("Trying to apply a random label on the found node.")
   605  		labelKey := fmt.Sprintf("kubernetes.io/e2e-label-key-%s", string(uuid.NewUUID()))
   606  		labelValue := "testing-label-value"
   607  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, labelKey, labelValue)
   608  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, labelKey, labelValue)
   609  		defer e2enode.RemoveLabelOffNode(cs, nodeName, labelKey)
   610  
   611  		ginkgo.By("Trying to relaunch the pod, now with tolerations.")
   612  		tolerationPodName := "with-tolerations"
   613  		createPausePod(ctx, f, pausePodConfig{
   614  			Name:         tolerationPodName,
   615  			Tolerations:  []v1.Toleration{{Key: testTaint.Key, Value: testTaint.Value, Effect: testTaint.Effect}},
   616  			NodeSelector: map[string]string{labelKey: labelValue},
   617  		})
   618  
   619  		// check that pod got scheduled. We intentionally DO NOT check that the
   620  		// pod is running because this will create a race condition with the
   621  		// kubelet and the scheduler: the scheduler might have scheduled a pod
   622  		// already when the kubelet does not know about its new taint yet. The
   623  		// kubelet will then refuse to launch the pod.
   624  		framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, cs, ns, tolerationPodName))
   625  		deployedPod, err := cs.CoreV1().Pods(ns).Get(ctx, tolerationPodName, metav1.GetOptions{})
   626  		framework.ExpectNoError(err)
   627  		gomega.Expect(deployedPod.Spec.NodeName).To(gomega.Equal(nodeName))
   628  	})
   629  
   630  	// 1. Run a pod to get an available node, then delete the pod
   631  	// 2. Taint the node with a random taint
   632  	// 3. Try to relaunch the pod still no tolerations,
   633  	// and the pod's nodeName specified to the name of node found in step 1
   634  	ginkgo.It("validates that taints-tolerations is respected if not matching", func(ctx context.Context) {
   635  		nodeName := getNodeThatCanRunPodWithoutToleration(ctx, f)
   636  
   637  		ginkgo.By("Trying to apply a random taint on the found node.")
   638  		testTaint := v1.Taint{
   639  			Key:    fmt.Sprintf("kubernetes.io/e2e-taint-key-%s", string(uuid.NewUUID())),
   640  			Value:  "testing-taint-value",
   641  			Effect: v1.TaintEffectNoSchedule,
   642  		}
   643  		e2enode.AddOrUpdateTaintOnNode(ctx, cs, nodeName, testTaint)
   644  		e2enode.ExpectNodeHasTaint(ctx, cs, nodeName, &testTaint)
   645  		ginkgo.DeferCleanup(e2enode.RemoveTaintOffNode, cs, nodeName, testTaint)
   646  
   647  		ginkgo.By("Trying to apply a random label on the found node.")
   648  		labelKey := fmt.Sprintf("kubernetes.io/e2e-label-key-%s", string(uuid.NewUUID()))
   649  		labelValue := "testing-label-value"
   650  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, labelKey, labelValue)
   651  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, labelKey, labelValue)
   652  		defer e2enode.RemoveLabelOffNode(cs, nodeName, labelKey)
   653  
   654  		ginkgo.By("Trying to relaunch the pod, still no tolerations.")
   655  		podNameNoTolerations := "still-no-tolerations"
   656  		conf := pausePodConfig{
   657  			Name:         podNameNoTolerations,
   658  			NodeSelector: map[string]string{labelKey: labelValue},
   659  		}
   660  
   661  		WaitForSchedulerAfterAction(ctx, f, createPausePodAction(f, conf), ns, podNameNoTolerations, false)
   662  		verifyResult(ctx, cs, 0, 1, ns)
   663  
   664  		ginkgo.By("Removing taint off the node")
   665  		WaitForSchedulerAfterAction(ctx, f, removeTaintFromNodeAction(cs, nodeName, testTaint), ns, podNameNoTolerations, true)
   666  		verifyResult(ctx, cs, 1, 0, ns)
   667  	})
   668  
   669  	ginkgo.It("validates that there is no conflict between pods with same hostPort but different hostIP and protocol", func(ctx context.Context) {
   670  
   671  		nodeName := GetNodeThatCanRunPod(ctx, f)
   672  		localhost := "127.0.0.1"
   673  		if framework.TestContext.ClusterIsIPv6() {
   674  			localhost = "::1"
   675  		}
   676  		hostIP := getNodeHostIP(ctx, f, nodeName)
   677  
   678  		// use nodeSelector to make sure the testing pods get assigned on the same node to explicitly verify there exists conflict or not
   679  		ginkgo.By("Trying to apply a random label on the found node.")
   680  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   681  		v := "90"
   682  
   683  		nodeSelector := make(map[string]string)
   684  		nodeSelector[k] = v
   685  
   686  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   687  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   688  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   689  
   690  		port := int32(54321)
   691  		ginkgo.By(fmt.Sprintf("Trying to create a pod(pod1) with hostport %v and hostIP %s and expect scheduled", port, localhost))
   692  		createHostPortPodOnNode(ctx, f, "pod1", ns, localhost, port, v1.ProtocolTCP, nodeSelector, true)
   693  
   694  		ginkgo.By(fmt.Sprintf("Trying to create another pod(pod2) with hostport %v but hostIP %s on the node which pod1 resides and expect scheduled", port, hostIP))
   695  		createHostPortPodOnNode(ctx, f, "pod2", ns, hostIP, port, v1.ProtocolTCP, nodeSelector, true)
   696  
   697  		ginkgo.By(fmt.Sprintf("Trying to create a third pod(pod3) with hostport %v, hostIP %s but use UDP protocol on the node which pod2 resides", port, hostIP))
   698  		createHostPortPodOnNode(ctx, f, "pod3", ns, hostIP, port, v1.ProtocolUDP, nodeSelector, true)
   699  
   700  	})
   701  
   702  	/*
   703  		Release: v1.16
   704  		Testname: Scheduling, HostPort and Protocol match, HostIPs different but one is default HostIP (0.0.0.0)
   705  		Description: Pods with the same HostPort and Protocol, but different HostIPs, MUST NOT schedule to the
   706  		same node if one of those IPs is the default HostIP of 0.0.0.0, which represents all IPs on the host.
   707  	*/
   708  	framework.ConformanceIt("validates that there exists conflict between pods with same hostPort and protocol but one using 0.0.0.0 hostIP", func(ctx context.Context) {
   709  		nodeName := GetNodeThatCanRunPod(ctx, f)
   710  		hostIP := getNodeHostIP(ctx, f, nodeName)
   711  		// use nodeSelector to make sure the testing pods get assigned on the same node to explicitly verify there exists conflict or not
   712  		ginkgo.By("Trying to apply a random label on the found node.")
   713  		k := fmt.Sprintf("kubernetes.io/e2e-%s", string(uuid.NewUUID()))
   714  		v := "95"
   715  
   716  		nodeSelector := make(map[string]string)
   717  		nodeSelector[k] = v
   718  
   719  		e2enode.AddOrUpdateLabelOnNode(cs, nodeName, k, v)
   720  		e2enode.ExpectNodeHasLabel(ctx, cs, nodeName, k, v)
   721  		defer e2enode.RemoveLabelOffNode(cs, nodeName, k)
   722  
   723  		port := int32(54322)
   724  		ginkgo.By(fmt.Sprintf("Trying to create a pod(pod4) with hostport %v and hostIP 0.0.0.0(empty string here) and expect scheduled", port))
   725  		createHostPortPodOnNode(ctx, f, "pod4", ns, "", port, v1.ProtocolTCP, nodeSelector, true)
   726  
   727  		ginkgo.By(fmt.Sprintf("Trying to create another pod(pod5) with hostport %v but hostIP %s on the node which pod4 resides and expect not scheduled", port, hostIP))
   728  		createHostPortPodOnNode(ctx, f, "pod5", ns, hostIP, port, v1.ProtocolTCP, nodeSelector, false)
   729  	})
   730  
   731  	ginkgo.Context("PodTopologySpread Filtering", func() {
   732  		var nodeNames []string
   733  		topologyKey := "kubernetes.io/e2e-pts-filter"
   734  
   735  		ginkgo.BeforeEach(func(ctx context.Context) {
   736  			if len(nodeList.Items) < 2 {
   737  				ginkgo.Skip("At least 2 nodes are required to run the test")
   738  			}
   739  			ginkgo.By("Trying to get 2 available nodes which can run pod")
   740  			nodeNames = Get2NodesThatCanRunPod(ctx, f)
   741  			ginkgo.By(fmt.Sprintf("Apply dedicated topologyKey %v for this test on the 2 nodes.", topologyKey))
   742  			for _, nodeName := range nodeNames {
   743  				e2enode.AddOrUpdateLabelOnNode(cs, nodeName, topologyKey, nodeName)
   744  			}
   745  		})
   746  		ginkgo.AfterEach(func() {
   747  			for _, nodeName := range nodeNames {
   748  				e2enode.RemoveLabelOffNode(cs, nodeName, topologyKey)
   749  			}
   750  		})
   751  
   752  		ginkgo.It("validates 4 pods with MaxSkew=1 are evenly distributed into 2 nodes", func(ctx context.Context) {
   753  			podLabel := "e2e-pts-filter"
   754  			replicas := 4
   755  			rsConfig := pauseRSConfig{
   756  				Replicas: int32(replicas),
   757  				PodConfig: pausePodConfig{
   758  					Name:      podLabel,
   759  					Namespace: ns,
   760  					Labels:    map[string]string{podLabel: ""},
   761  					Affinity: &v1.Affinity{
   762  						NodeAffinity: &v1.NodeAffinity{
   763  							RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
   764  								NodeSelectorTerms: []v1.NodeSelectorTerm{
   765  									{
   766  										MatchExpressions: []v1.NodeSelectorRequirement{
   767  											{
   768  												Key:      topologyKey,
   769  												Operator: v1.NodeSelectorOpIn,
   770  												Values:   nodeNames,
   771  											},
   772  										},
   773  									},
   774  								},
   775  							},
   776  						},
   777  					},
   778  					TopologySpreadConstraints: []v1.TopologySpreadConstraint{
   779  						{
   780  							MaxSkew:           1,
   781  							TopologyKey:       topologyKey,
   782  							WhenUnsatisfiable: v1.DoNotSchedule,
   783  							LabelSelector: &metav1.LabelSelector{
   784  								MatchExpressions: []metav1.LabelSelectorRequirement{
   785  									{
   786  										Key:      podLabel,
   787  										Operator: metav1.LabelSelectorOpExists,
   788  									},
   789  								},
   790  							},
   791  						},
   792  					},
   793  				},
   794  			}
   795  			runPauseRS(ctx, f, rsConfig)
   796  			podList, err := cs.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
   797  			framework.ExpectNoError(err)
   798  			numInNode1, numInNode2 := 0, 0
   799  			for _, pod := range podList.Items {
   800  				if pod.Spec.NodeName == nodeNames[0] {
   801  					numInNode1++
   802  				} else if pod.Spec.NodeName == nodeNames[1] {
   803  					numInNode2++
   804  				}
   805  			}
   806  			expected := replicas / len(nodeNames)
   807  			gomega.Expect(numInNode1).To(gomega.Equal(expected), fmt.Sprintf("Pods are not distributed as expected on node %q", nodeNames[0]))
   808  			gomega.Expect(numInNode2).To(gomega.Equal(expected), fmt.Sprintf("Pods are not distributed as expected on node %q", nodeNames[1]))
   809  		})
   810  	})
   811  
   812  	ginkgo.It("validates Pods with non-empty schedulingGates are blocked on scheduling", func(ctx context.Context) {
   813  		podLabel := "e2e-scheduling-gates"
   814  		replicas := 3
   815  		ginkgo.By(fmt.Sprintf("Creating a ReplicaSet with replicas=%v, carrying scheduling gates [foo bar]", replicas))
   816  		rsConfig := pauseRSConfig{
   817  			Replicas: int32(replicas),
   818  			PodConfig: pausePodConfig{
   819  				Name:      podLabel,
   820  				Namespace: ns,
   821  				Labels:    map[string]string{podLabel: ""},
   822  				SchedulingGates: []v1.PodSchedulingGate{
   823  					{Name: "foo"},
   824  					{Name: "bar"},
   825  				},
   826  			},
   827  		}
   828  		createPauseRS(ctx, f, rsConfig)
   829  
   830  		ginkgo.By("Expect all pods stay in pending state")
   831  		podList, err := e2epod.WaitForNumberOfPods(ctx, cs, ns, replicas, time.Minute)
   832  		framework.ExpectNoError(err)
   833  		framework.ExpectNoError(e2epod.WaitForPodsSchedulingGated(ctx, cs, ns, replicas, time.Minute))
   834  
   835  		ginkgo.By("Remove one scheduling gate")
   836  		want := []v1.PodSchedulingGate{{Name: "bar"}}
   837  		var pods []*v1.Pod
   838  		for _, pod := range podList.Items {
   839  			clone := pod.DeepCopy()
   840  			clone.Spec.SchedulingGates = want
   841  			live, err := patchPod(cs, &pod, clone)
   842  			framework.ExpectNoError(err)
   843  			pods = append(pods, live)
   844  		}
   845  
   846  		ginkgo.By("Expect all pods carry one scheduling gate and are still in pending state")
   847  		framework.ExpectNoError(e2epod.WaitForPodsWithSchedulingGates(ctx, cs, ns, replicas, time.Minute, want))
   848  		framework.ExpectNoError(e2epod.WaitForPodsSchedulingGated(ctx, cs, ns, replicas, time.Minute))
   849  
   850  		ginkgo.By("Remove the remaining scheduling gates")
   851  		for _, pod := range pods {
   852  			clone := pod.DeepCopy()
   853  			clone.Spec.SchedulingGates = nil
   854  			_, err := patchPod(cs, pod, clone)
   855  			framework.ExpectNoError(err)
   856  		}
   857  
   858  		ginkgo.By("Expect all pods are scheduled and running")
   859  		framework.ExpectNoError(e2epod.WaitForPodsRunning(ctx, cs, ns, replicas, time.Minute))
   860  	})
   861  
   862  	// Regression test for an extended scenario for https://issues.k8s.io/123465
   863  	ginkgo.It("when PVC has node-affinity to non-existent/illegal nodes, the pod should be scheduled normally if suitable nodes exist", func(ctx context.Context) {
   864  		nodeName := GetNodeThatCanRunPod(ctx, f)
   865  		nonExistentNodeName1 := string(uuid.NewUUID())
   866  		nonExistentNodeName2 := string(uuid.NewUUID())
   867  		hostLabel := "kubernetes.io/hostname"
   868  		localPath := "/tmp"
   869  		podName := "bind-pv-with-non-existent-nodes"
   870  		pvcName := "pvc-" + string(uuid.NewUUID())
   871  		_, pvc, err := e2epv.CreatePVPVC(ctx, cs, f.Timeouts, e2epv.PersistentVolumeConfig{
   872  			PVSource: v1.PersistentVolumeSource{
   873  				Local: &v1.LocalVolumeSource{
   874  					Path: localPath,
   875  				},
   876  			},
   877  			Prebind: &v1.PersistentVolumeClaim{
   878  				ObjectMeta: metav1.ObjectMeta{Name: pvcName, Namespace: ns},
   879  			},
   880  			NodeAffinity: &v1.VolumeNodeAffinity{
   881  				Required: &v1.NodeSelector{
   882  					NodeSelectorTerms: []v1.NodeSelectorTerm{
   883  						{
   884  							MatchExpressions: []v1.NodeSelectorRequirement{
   885  								{
   886  									Key:      hostLabel,
   887  									Operator: v1.NodeSelectorOpIn,
   888  									// add non-existent nodes to the list
   889  									Values: []string{nodeName, nonExistentNodeName1, nonExistentNodeName2},
   890  								},
   891  							},
   892  						},
   893  					},
   894  				},
   895  			},
   896  		}, e2epv.PersistentVolumeClaimConfig{
   897  			Name: pvcName,
   898  		}, ns, true)
   899  		framework.ExpectNoError(err)
   900  		bindPvPod := &v1.Pod{
   901  			ObjectMeta: metav1.ObjectMeta{
   902  				Name: podName,
   903  			},
   904  			Spec: v1.PodSpec{
   905  				Containers: []v1.Container{
   906  					{
   907  						Name:  "pause",
   908  						Image: imageutils.GetE2EImage(imageutils.Pause),
   909  						VolumeMounts: []v1.VolumeMount{
   910  							{
   911  								Name:      "data",
   912  								MountPath: "/tmp",
   913  							},
   914  						},
   915  					},
   916  				},
   917  				Volumes: []v1.Volume{
   918  					{
   919  						Name: "data",
   920  						VolumeSource: v1.VolumeSource{
   921  							PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
   922  								ClaimName: pvc.Name,
   923  							},
   924  						},
   925  					},
   926  				},
   927  			},
   928  		}
   929  		_, err = f.ClientSet.CoreV1().Pods(ns).Create(ctx, bindPvPod, metav1.CreateOptions{})
   930  		framework.ExpectNoError(err)
   931  		framework.ExpectNoError(e2epod.WaitForPodNotPending(ctx, f.ClientSet, ns, podName))
   932  	})
   933  })
   934  
   935  func patchPod(cs clientset.Interface, old, new *v1.Pod) (*v1.Pod, error) {
   936  	oldData, err := json.Marshal(old)
   937  	if err != nil {
   938  		return nil, err
   939  	}
   940  
   941  	newData, err := json.Marshal(new)
   942  	if err != nil {
   943  		return nil, err
   944  	}
   945  	patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Pod{})
   946  	if err != nil {
   947  		return nil, fmt.Errorf("failed to create merge patch for Pod %q: %w", old.Name, err)
   948  	}
   949  	return cs.CoreV1().Pods(new.Namespace).Patch(context.TODO(), new.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
   950  }
   951  
   952  // printAllPodsOnNode outputs status of all kubelet pods into log.
   953  func printAllPodsOnNode(ctx context.Context, c clientset.Interface, nodeName string) {
   954  	podList, err := c.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{FieldSelector: "spec.nodeName=" + nodeName})
   955  	if err != nil {
   956  		framework.Logf("Unable to retrieve pods for node %v: %v", nodeName, err)
   957  		return
   958  	}
   959  	for _, p := range podList.Items {
   960  		framework.Logf("%v from %v started at %v (%d container statuses recorded)", p.Name, p.Namespace, p.Status.StartTime, len(p.Status.ContainerStatuses))
   961  		for _, c := range p.Status.ContainerStatuses {
   962  			framework.Logf("\tContainer %v ready: %v, restart count %v",
   963  				c.Name, c.Ready, c.RestartCount)
   964  		}
   965  	}
   966  }
   967  
   968  func initPausePod(f *framework.Framework, conf pausePodConfig) *v1.Pod {
   969  	var gracePeriod = int64(1)
   970  	pod := &v1.Pod{
   971  		ObjectMeta: metav1.ObjectMeta{
   972  			Name:            conf.Name,
   973  			Namespace:       conf.Namespace,
   974  			Labels:          map[string]string{},
   975  			Annotations:     map[string]string{},
   976  			OwnerReferences: conf.OwnerReferences,
   977  			Finalizers:      conf.Finalizers,
   978  		},
   979  		Spec: v1.PodSpec{
   980  			SecurityContext:           e2epod.GetRestrictedPodSecurityContext(),
   981  			NodeSelector:              conf.NodeSelector,
   982  			Affinity:                  conf.Affinity,
   983  			TopologySpreadConstraints: conf.TopologySpreadConstraints,
   984  			RuntimeClassName:          conf.RuntimeClassHandler,
   985  			Containers: []v1.Container{
   986  				{
   987  					Name:            conf.Name,
   988  					Image:           imageutils.GetPauseImageName(),
   989  					Ports:           conf.Ports,
   990  					SecurityContext: e2epod.GetRestrictedContainerSecurityContext(),
   991  				},
   992  			},
   993  			Tolerations:                   conf.Tolerations,
   994  			PriorityClassName:             conf.PriorityClassName,
   995  			TerminationGracePeriodSeconds: &gracePeriod,
   996  			SchedulingGates:               conf.SchedulingGates,
   997  		},
   998  	}
   999  	for key, value := range conf.Labels {
  1000  		pod.ObjectMeta.Labels[key] = value
  1001  	}
  1002  	for key, value := range conf.Annotations {
  1003  		pod.ObjectMeta.Annotations[key] = value
  1004  	}
  1005  	// TODO: setting the Pod's nodeAffinity instead of setting .spec.nodeName works around the
  1006  	// Preemption e2e flake (#88441), but we should investigate deeper to get to the bottom of it.
  1007  	if len(conf.NodeName) != 0 {
  1008  		e2epod.SetNodeAffinity(&pod.Spec, conf.NodeName)
  1009  	}
  1010  	if conf.Resources != nil {
  1011  		pod.Spec.Containers[0].Resources = *conf.Resources
  1012  	}
  1013  	if conf.DeletionGracePeriodSeconds != nil {
  1014  		pod.ObjectMeta.DeletionGracePeriodSeconds = conf.DeletionGracePeriodSeconds
  1015  	}
  1016  	return pod
  1017  }
  1018  
  1019  func createPausePod(ctx context.Context, f *framework.Framework, conf pausePodConfig) *v1.Pod {
  1020  	namespace := conf.Namespace
  1021  	if len(namespace) == 0 {
  1022  		namespace = f.Namespace.Name
  1023  	}
  1024  	pod, err := f.ClientSet.CoreV1().Pods(namespace).Create(ctx, initPausePod(f, conf), metav1.CreateOptions{})
  1025  	framework.ExpectNoError(err)
  1026  	return pod
  1027  }
  1028  
  1029  func runPausePod(ctx context.Context, f *framework.Framework, conf pausePodConfig) *v1.Pod {
  1030  	pod := createPausePod(ctx, f, conf)
  1031  	framework.ExpectNoError(e2epod.WaitTimeoutForPodRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodStartShort))
  1032  	pod, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Get(ctx, conf.Name, metav1.GetOptions{})
  1033  	framework.ExpectNoError(err)
  1034  	return pod
  1035  }
  1036  
  1037  func runPodAndGetNodeName(ctx context.Context, f *framework.Framework, conf pausePodConfig) string {
  1038  	// launch a pod to find a node which can launch a pod. We intentionally do
  1039  	// not just take the node list and choose the first of them. Depending on the
  1040  	// cluster and the scheduler it might be that a "normal" pod cannot be
  1041  	// scheduled onto it.
  1042  	pod := runPausePod(ctx, f, conf)
  1043  
  1044  	ginkgo.By("Explicitly delete pod here to free the resource it takes.")
  1045  	err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0))
  1046  	framework.ExpectNoError(err)
  1047  
  1048  	return pod.Spec.NodeName
  1049  }
  1050  
  1051  func getRequestedCPU(pod v1.Pod) int64 {
  1052  	var result int64
  1053  	for _, container := range pod.Spec.Containers {
  1054  		result += container.Resources.Requests.Cpu().MilliValue()
  1055  	}
  1056  	return result
  1057  }
  1058  
  1059  func getRequestedStorageEphemeralStorage(pod v1.Pod) int64 {
  1060  	var result int64
  1061  	for _, container := range pod.Spec.Containers {
  1062  		result += container.Resources.Requests.StorageEphemeral().Value()
  1063  	}
  1064  	return result
  1065  }
  1066  
  1067  // removeTaintFromNodeAction returns a closure that removes the given taint
  1068  // from the given node upon invocation.
  1069  func removeTaintFromNodeAction(cs clientset.Interface, nodeName string, testTaint v1.Taint) Action {
  1070  	return func(ctx context.Context) error {
  1071  		e2enode.RemoveTaintOffNode(ctx, cs, nodeName, testTaint)
  1072  		return nil
  1073  	}
  1074  }
  1075  
  1076  // createPausePodAction returns a closure that creates a pause pod upon invocation.
  1077  func createPausePodAction(f *framework.Framework, conf pausePodConfig) Action {
  1078  	return func(ctx context.Context) error {
  1079  		_, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(ctx, initPausePod(f, conf), metav1.CreateOptions{})
  1080  		return err
  1081  	}
  1082  }
  1083  
  1084  // WaitForSchedulerAfterAction performs the provided action and then waits for
  1085  // scheduler to act on the given pod.
  1086  func WaitForSchedulerAfterAction(ctx context.Context, f *framework.Framework, action Action, ns, podName string, expectSuccess bool) {
  1087  	predicate := scheduleFailureEvent(podName)
  1088  	if expectSuccess {
  1089  		predicate = scheduleSuccessEvent(ns, podName, "" /* any node */)
  1090  	}
  1091  	observed, err := observeEventAfterAction(ctx, f.ClientSet, f.Namespace.Name, predicate, action)
  1092  	framework.ExpectNoError(err)
  1093  	if expectSuccess && !observed {
  1094  		framework.Failf("Did not observe success event after performing the supplied action for pod %v", podName)
  1095  	}
  1096  	if !expectSuccess && !observed {
  1097  		framework.Failf("Did not observe failed event after performing the supplied action for pod %v", podName)
  1098  	}
  1099  }
  1100  
  1101  // TODO: upgrade calls in PodAffinity tests when we're able to run them
  1102  func verifyResult(ctx context.Context, c clientset.Interface, expectedScheduled int, expectedNotScheduled int, ns string) {
  1103  	allPods, err := c.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{})
  1104  	framework.ExpectNoError(err)
  1105  	scheduledPods, notScheduledPods := GetPodsScheduled(workerNodes, allPods)
  1106  
  1107  	gomega.Expect(notScheduledPods).To(gomega.HaveLen(expectedNotScheduled), fmt.Sprintf("Not scheduled Pods: %#v", notScheduledPods))
  1108  	gomega.Expect(scheduledPods).To(gomega.HaveLen(expectedScheduled), fmt.Sprintf("Scheduled Pods: %#v", scheduledPods))
  1109  }
  1110  
  1111  // GetNodeThatCanRunPod trying to launch a pod without a label to get a node which can launch it
  1112  func GetNodeThatCanRunPod(ctx context.Context, f *framework.Framework) string {
  1113  	ginkgo.By("Trying to launch a pod without a label to get a node which can launch it.")
  1114  	return runPodAndGetNodeName(ctx, f, pausePodConfig{Name: "without-label"})
  1115  }
  1116  
  1117  // Get2NodesThatCanRunPod return a 2-node slice where can run pod.
  1118  func Get2NodesThatCanRunPod(ctx context.Context, f *framework.Framework) []string {
  1119  	firstNode := GetNodeThatCanRunPod(ctx, f)
  1120  	ginkgo.By("Trying to launch a pod without a label to get a node which can launch it.")
  1121  	pod := pausePodConfig{
  1122  		Name: "without-label",
  1123  		Affinity: &v1.Affinity{
  1124  			NodeAffinity: &v1.NodeAffinity{
  1125  				RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
  1126  					NodeSelectorTerms: []v1.NodeSelectorTerm{
  1127  						{
  1128  							MatchFields: []v1.NodeSelectorRequirement{
  1129  								{Key: "metadata.name", Operator: v1.NodeSelectorOpNotIn, Values: []string{firstNode}},
  1130  							},
  1131  						},
  1132  					},
  1133  				},
  1134  			},
  1135  		},
  1136  	}
  1137  	secondNode := runPodAndGetNodeName(ctx, f, pod)
  1138  	return []string{firstNode, secondNode}
  1139  }
  1140  
  1141  func getNodeThatCanRunPodWithoutToleration(ctx context.Context, f *framework.Framework) string {
  1142  	ginkgo.By("Trying to launch a pod without a toleration to get a node which can launch it.")
  1143  	return runPodAndGetNodeName(ctx, f, pausePodConfig{Name: "without-toleration"})
  1144  }
  1145  
  1146  // CreateHostPortPods creates RC with host port 4321
  1147  func CreateHostPortPods(ctx context.Context, f *framework.Framework, id string, replicas int, expectRunning bool) {
  1148  	ginkgo.By("Running RC which reserves host port")
  1149  	config := &testutils.RCConfig{
  1150  		Client:    f.ClientSet,
  1151  		Name:      id,
  1152  		Namespace: f.Namespace.Name,
  1153  		Timeout:   defaultTimeout,
  1154  		Image:     imageutils.GetPauseImageName(),
  1155  		Replicas:  replicas,
  1156  		HostPorts: map[string]int{"port1": 4321},
  1157  	}
  1158  	err := e2erc.RunRC(ctx, *config)
  1159  	if expectRunning {
  1160  		framework.ExpectNoError(err)
  1161  	}
  1162  }
  1163  
  1164  // CreateNodeSelectorPods creates RC with host port 4321 and defines node selector
  1165  func CreateNodeSelectorPods(ctx context.Context, f *framework.Framework, id string, replicas int, nodeSelector map[string]string, expectRunning bool) error {
  1166  	ginkgo.By("Running RC which reserves host port and defines node selector")
  1167  
  1168  	config := &testutils.RCConfig{
  1169  		Client:       f.ClientSet,
  1170  		Name:         id,
  1171  		Namespace:    f.Namespace.Name,
  1172  		Timeout:      defaultTimeout,
  1173  		Image:        imageutils.GetPauseImageName(),
  1174  		Replicas:     replicas,
  1175  		HostPorts:    map[string]int{"port1": 4321},
  1176  		NodeSelector: nodeSelector,
  1177  	}
  1178  	err := e2erc.RunRC(ctx, *config)
  1179  	if expectRunning {
  1180  		return err
  1181  	}
  1182  	return nil
  1183  }
  1184  
  1185  // create pod which using hostport on the specified node according to the nodeSelector
  1186  // it starts an http server on the exposed port
  1187  func createHostPortPodOnNode(ctx context.Context, f *framework.Framework, podName, ns, hostIP string, port int32, protocol v1.Protocol, nodeSelector map[string]string, expectScheduled bool) {
  1188  	hostPortPod := &v1.Pod{
  1189  		ObjectMeta: metav1.ObjectMeta{
  1190  			Name: podName,
  1191  		},
  1192  		Spec: v1.PodSpec{
  1193  			Containers: []v1.Container{
  1194  				{
  1195  					Name:  "agnhost",
  1196  					Image: imageutils.GetE2EImage(imageutils.Agnhost),
  1197  					Args:  []string{"netexec", "--http-port=8080", "--udp-port=8080"},
  1198  					Ports: []v1.ContainerPort{
  1199  						{
  1200  							HostPort:      port,
  1201  							ContainerPort: 8080,
  1202  							Protocol:      protocol,
  1203  							HostIP:        hostIP,
  1204  						},
  1205  					},
  1206  					ReadinessProbe: &v1.Probe{
  1207  						ProbeHandler: v1.ProbeHandler{
  1208  							HTTPGet: &v1.HTTPGetAction{
  1209  								Path: "/hostname",
  1210  								Port: intstr.IntOrString{
  1211  									IntVal: int32(8080),
  1212  								},
  1213  								Scheme: v1.URISchemeHTTP,
  1214  							},
  1215  						},
  1216  					},
  1217  				},
  1218  			},
  1219  			NodeSelector: nodeSelector,
  1220  		},
  1221  	}
  1222  	_, err := f.ClientSet.CoreV1().Pods(ns).Create(ctx, hostPortPod, metav1.CreateOptions{})
  1223  	framework.ExpectNoError(err)
  1224  
  1225  	err = e2epod.WaitForPodNotPending(ctx, f.ClientSet, ns, podName)
  1226  	if expectScheduled {
  1227  		framework.ExpectNoError(err)
  1228  	}
  1229  }
  1230  
  1231  // GetPodsScheduled returns a number of currently scheduled and not scheduled Pods on worker nodes.
  1232  func GetPodsScheduled(workerNodes sets.Set[string], pods *v1.PodList) (scheduledPods, notScheduledPods []v1.Pod) {
  1233  	for _, pod := range pods.Items {
  1234  		if pod.Spec.NodeName != "" && workerNodes.Has(pod.Spec.NodeName) {
  1235  			_, scheduledCondition := podutil.GetPodCondition(&pod.Status, v1.PodScheduled)
  1236  			if scheduledCondition == nil {
  1237  				framework.Failf("Did not find 'scheduled' condition for pod %+v", podName)
  1238  			}
  1239  			if scheduledCondition.Status != v1.ConditionTrue {
  1240  				framework.Failf("PodStatus isn't 'true' for pod %+v", podName)
  1241  			}
  1242  			scheduledPods = append(scheduledPods, pod)
  1243  		} else if pod.Spec.NodeName == "" {
  1244  			notScheduledPods = append(notScheduledPods, pod)
  1245  		}
  1246  	}
  1247  	return
  1248  }
  1249  
  1250  // getNodeHostIP returns the first internal IP on the node matching the main Cluster IP family
  1251  func getNodeHostIP(ctx context.Context, f *framework.Framework, nodeName string) string {
  1252  	// Get the internal HostIP of the node
  1253  	family := v1.IPv4Protocol
  1254  	if framework.TestContext.ClusterIsIPv6() {
  1255  		family = v1.IPv6Protocol
  1256  	}
  1257  	node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
  1258  	framework.ExpectNoError(err)
  1259  	ips := e2enode.GetAddressesByTypeAndFamily(node, v1.NodeInternalIP, family)
  1260  	gomega.Expect(ips).ToNot(gomega.BeEmpty())
  1261  	return ips[0]
  1262  }