k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package interpodaffinity
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	v1 "k8s.io/api/core/v1"
    24  	"k8s.io/apimachinery/pkg/labels"
    25  	"k8s.io/apimachinery/pkg/runtime"
    26  	listersv1 "k8s.io/client-go/listers/core/v1"
    27  	"k8s.io/klog/v2"
    28  	"k8s.io/kubernetes/pkg/scheduler/apis/config"
    29  	"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
    30  	"k8s.io/kubernetes/pkg/scheduler/framework"
    31  	"k8s.io/kubernetes/pkg/scheduler/framework/parallelize"
    32  	"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
    33  	"k8s.io/kubernetes/pkg/scheduler/util"
    34  )
    35  
    36  // Name is the name of the plugin used in the plugin registry and configurations.
    37  const Name = names.InterPodAffinity
    38  
    39  var _ framework.PreFilterPlugin = &InterPodAffinity{}
    40  var _ framework.FilterPlugin = &InterPodAffinity{}
    41  var _ framework.PreScorePlugin = &InterPodAffinity{}
    42  var _ framework.ScorePlugin = &InterPodAffinity{}
    43  var _ framework.EnqueueExtensions = &InterPodAffinity{}
    44  
    45  // InterPodAffinity is a plugin that checks inter pod affinity
    46  type InterPodAffinity struct {
    47  	parallelizer parallelize.Parallelizer
    48  	args         config.InterPodAffinityArgs
    49  	sharedLister framework.SharedLister
    50  	nsLister     listersv1.NamespaceLister
    51  }
    52  
    53  // Name returns name of the plugin. It is used in logs, etc.
    54  func (pl *InterPodAffinity) Name() string {
    55  	return Name
    56  }
    57  
    58  // EventsToRegister returns the possible events that may make a failed Pod
    59  // schedulable
    60  func (pl *InterPodAffinity) EventsToRegister() []framework.ClusterEventWithHint {
    61  	return []framework.ClusterEventWithHint{
    62  		// All ActionType includes the following events:
    63  		// - Delete. An unschedulable Pod may fail due to violating an existing Pod's anti-affinity constraints,
    64  		// deleting an existing Pod may make it schedulable.
    65  		// - Update. Updating on an existing Pod's labels (e.g., removal) may make
    66  		// an unschedulable Pod schedulable.
    67  		// - Add. An unschedulable Pod may fail due to violating pod-affinity constraints,
    68  		// adding an assigned Pod may make it schedulable.
    69  		//
    70  		// A note about UpdateNodeTaint event:
    71  		// NodeAdd QueueingHint isn't always called because of the internal feature called preCheck.
    72  		// As a common problematic scenario,
    73  		// when a node is added but not ready, NodeAdd event is filtered out by preCheck and doesn't arrive.
    74  		// In such cases, this plugin may miss some events that actually make pods schedulable.
    75  		// As a workaround, we add UpdateNodeTaint event to catch the case.
    76  		// We can remove UpdateNodeTaint when we remove the preCheck feature.
    77  		// See: https://github.com/kubernetes/kubernetes/issues/110175
    78  		{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.All}, QueueingHintFn: pl.isSchedulableAfterPodChange},
    79  		{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
    80  	}
    81  }
    82  
    83  // New initializes a new plugin and returns it.
    84  func New(_ context.Context, plArgs runtime.Object, h framework.Handle) (framework.Plugin, error) {
    85  	if h.SnapshotSharedLister() == nil {
    86  		return nil, fmt.Errorf("SnapshotSharedlister is nil")
    87  	}
    88  	args, err := getArgs(plArgs)
    89  	if err != nil {
    90  		return nil, err
    91  	}
    92  	if err := validation.ValidateInterPodAffinityArgs(nil, &args); err != nil {
    93  		return nil, err
    94  	}
    95  	pl := &InterPodAffinity{
    96  		parallelizer: h.Parallelizer(),
    97  		args:         args,
    98  		sharedLister: h.SnapshotSharedLister(),
    99  		nsLister:     h.SharedInformerFactory().Core().V1().Namespaces().Lister(),
   100  	}
   101  
   102  	return pl, nil
   103  }
   104  
   105  func getArgs(obj runtime.Object) (config.InterPodAffinityArgs, error) {
   106  	ptr, ok := obj.(*config.InterPodAffinityArgs)
   107  	if !ok {
   108  		return config.InterPodAffinityArgs{}, fmt.Errorf("want args to be of type InterPodAffinityArgs, got %T", obj)
   109  	}
   110  	return *ptr, nil
   111  }
   112  
   113  // Updates Namespaces with the set of namespaces identified by NamespaceSelector.
   114  // If successful, NamespaceSelector is set to nil.
   115  // The assumption is that the term is for an incoming pod, in which case
   116  // namespaceSelector is either unrolled into Namespaces (and so the selector
   117  // is set to Nothing()) or is Empty(), which means match everything. Therefore,
   118  // there when matching against this term, there is no need to lookup the existing
   119  // pod's namespace labels to match them against term's namespaceSelector explicitly.
   120  func (pl *InterPodAffinity) mergeAffinityTermNamespacesIfNotEmpty(at *framework.AffinityTerm) error {
   121  	if at.NamespaceSelector.Empty() {
   122  		return nil
   123  	}
   124  	ns, err := pl.nsLister.List(at.NamespaceSelector)
   125  	if err != nil {
   126  		return err
   127  	}
   128  	for _, n := range ns {
   129  		at.Namespaces.Insert(n.Name)
   130  	}
   131  	at.NamespaceSelector = labels.Nothing()
   132  	return nil
   133  }
   134  
   135  // GetNamespaceLabelsSnapshot returns a snapshot of the labels associated with
   136  // the namespace.
   137  func GetNamespaceLabelsSnapshot(logger klog.Logger, ns string, nsLister listersv1.NamespaceLister) (nsLabels labels.Set) {
   138  	podNS, err := nsLister.Get(ns)
   139  	if err == nil {
   140  		// Create and return snapshot of the labels.
   141  		return labels.Merge(podNS.Labels, nil)
   142  	}
   143  	logger.V(3).Info("getting namespace, assuming empty set of namespace labels", "namespace", ns, "err", err)
   144  	return
   145  }
   146  
   147  func (pl *InterPodAffinity) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
   148  	originalPod, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
   149  	if err != nil {
   150  		return framework.Queue, err
   151  	}
   152  	if (modifiedPod != nil && modifiedPod.Spec.NodeName == "") || (originalPod != nil && originalPod.Spec.NodeName == "") {
   153  		logger.V(5).Info("the added/updated/deleted pod is unscheduled, so it doesn't make the target pod schedulable",
   154  			"pod", klog.KObj(pod), "originalPod", klog.KObj(originalPod), "modifiedPod", klog.KObj(modifiedPod))
   155  		return framework.QueueSkip, nil
   156  	}
   157  
   158  	terms, err := framework.GetAffinityTerms(pod, framework.GetPodAffinityTerms(pod.Spec.Affinity))
   159  	if err != nil {
   160  		return framework.Queue, err
   161  	}
   162  
   163  	antiTerms, err := framework.GetAffinityTerms(pod, framework.GetPodAntiAffinityTerms(pod.Spec.Affinity))
   164  	if err != nil {
   165  		return framework.Queue, err
   166  	}
   167  
   168  	// Pod is updated. Return Queue when the updated pod matching the target pod's affinity or not matching anti-affinity.
   169  	// Note that, we don't need to check each affinity individually when the Pod has more than one affinity
   170  	// because the current PodAffinity looks for a **single** existing pod that can satisfy **all** the terms of inter-pod affinity of an incoming pod.
   171  	if modifiedPod != nil && originalPod != nil {
   172  		if !podMatchesAllAffinityTerms(terms, originalPod) && podMatchesAllAffinityTerms(terms, modifiedPod) {
   173  			logger.V(5).Info("a scheduled pod was updated to match the target pod's affinity, and the pod may be schedulable now",
   174  				"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
   175  			return framework.Queue, nil
   176  		}
   177  		if podMatchesAllAffinityTerms(antiTerms, originalPod) && !podMatchesAllAffinityTerms(antiTerms, modifiedPod) {
   178  			logger.V(5).Info("a scheduled pod was updated not to match the target pod's anti affinity, and the pod may be schedulable now",
   179  				"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
   180  			return framework.Queue, nil
   181  		}
   182  		logger.V(5).Info("a scheduled pod was updated but it doesn't match the target pod's affinity or does match the target pod's anti-affinity",
   183  			"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
   184  		return framework.QueueSkip, nil
   185  	}
   186  
   187  	// Pod is added. Return Queue when the added pod matching the target pod's affinity.
   188  	if modifiedPod != nil {
   189  		if podMatchesAllAffinityTerms(terms, modifiedPod) {
   190  			logger.V(5).Info("a scheduled pod was added and it matches the target pod's affinity",
   191  				"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
   192  			return framework.Queue, nil
   193  		}
   194  		logger.V(5).Info("a scheduled pod was added and it doesn't match the target pod's affinity",
   195  			"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
   196  		return framework.QueueSkip, nil
   197  	}
   198  
   199  	// Pod is deleted. Return Queue when the deleted pod matching the target pod's anti-affinity.
   200  	if !podMatchesAllAffinityTerms(antiTerms, originalPod) {
   201  		logger.V(5).Info("a scheduled pod was deleted but it doesn't match the target pod's anti-affinity",
   202  			"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
   203  		return framework.QueueSkip, nil
   204  	}
   205  	logger.V(5).Info("a scheduled pod was deleted and it matches the target pod's anti-affinity. The pod may be schedulable now",
   206  		"pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod))
   207  	return framework.Queue, nil
   208  }
   209  
   210  func (pl *InterPodAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
   211  	_, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
   212  	if err != nil {
   213  		return framework.Queue, err
   214  	}
   215  
   216  	terms, err := framework.GetAffinityTerms(pod, framework.GetPodAffinityTerms(pod.Spec.Affinity))
   217  	if err != nil {
   218  		return framework.Queue, err
   219  	}
   220  
   221  	for _, term := range terms {
   222  		if _, ok := modifiedNode.Labels[term.TopologyKey]; ok {
   223  			logger.V(5).Info("a node with matched pod affinity topologyKey was added/updated and it may make pod schedulable",
   224  				"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
   225  			return framework.Queue, err
   226  		}
   227  	}
   228  
   229  	antiTerms, err := framework.GetAffinityTerms(pod, framework.GetPodAntiAffinityTerms(pod.Spec.Affinity))
   230  	if err != nil {
   231  		return framework.Queue, err
   232  	}
   233  
   234  	for _, term := range antiTerms {
   235  		if _, ok := modifiedNode.Labels[term.TopologyKey]; ok {
   236  			logger.V(5).Info("a node with matched pod anti-affinity topologyKey was added/updated and it may make pod schedulable",
   237  				"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
   238  			return framework.Queue, err
   239  		}
   240  	}
   241  	logger.V(5).Info("a node is added/updated but doesn't have any topologyKey which matches pod affinity/anti-affinity",
   242  		"pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
   243  	return framework.QueueSkip, nil
   244  }