k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/framework/plugins/interpodaffinity/plugin.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package interpodaffinity 18 19 import ( 20 "context" 21 "fmt" 22 23 v1 "k8s.io/api/core/v1" 24 "k8s.io/apimachinery/pkg/labels" 25 "k8s.io/apimachinery/pkg/runtime" 26 listersv1 "k8s.io/client-go/listers/core/v1" 27 "k8s.io/klog/v2" 28 "k8s.io/kubernetes/pkg/scheduler/apis/config" 29 "k8s.io/kubernetes/pkg/scheduler/apis/config/validation" 30 "k8s.io/kubernetes/pkg/scheduler/framework" 31 "k8s.io/kubernetes/pkg/scheduler/framework/parallelize" 32 "k8s.io/kubernetes/pkg/scheduler/framework/plugins/names" 33 "k8s.io/kubernetes/pkg/scheduler/util" 34 ) 35 36 // Name is the name of the plugin used in the plugin registry and configurations. 37 const Name = names.InterPodAffinity 38 39 var _ framework.PreFilterPlugin = &InterPodAffinity{} 40 var _ framework.FilterPlugin = &InterPodAffinity{} 41 var _ framework.PreScorePlugin = &InterPodAffinity{} 42 var _ framework.ScorePlugin = &InterPodAffinity{} 43 var _ framework.EnqueueExtensions = &InterPodAffinity{} 44 45 // InterPodAffinity is a plugin that checks inter pod affinity 46 type InterPodAffinity struct { 47 parallelizer parallelize.Parallelizer 48 args config.InterPodAffinityArgs 49 sharedLister framework.SharedLister 50 nsLister listersv1.NamespaceLister 51 } 52 53 // Name returns name of the plugin. It is used in logs, etc. 54 func (pl *InterPodAffinity) Name() string { 55 return Name 56 } 57 58 // EventsToRegister returns the possible events that may make a failed Pod 59 // schedulable 60 func (pl *InterPodAffinity) EventsToRegister() []framework.ClusterEventWithHint { 61 return []framework.ClusterEventWithHint{ 62 // All ActionType includes the following events: 63 // - Delete. An unschedulable Pod may fail due to violating an existing Pod's anti-affinity constraints, 64 // deleting an existing Pod may make it schedulable. 65 // - Update. Updating on an existing Pod's labels (e.g., removal) may make 66 // an unschedulable Pod schedulable. 67 // - Add. An unschedulable Pod may fail due to violating pod-affinity constraints, 68 // adding an assigned Pod may make it schedulable. 69 // 70 // A note about UpdateNodeTaint event: 71 // NodeAdd QueueingHint isn't always called because of the internal feature called preCheck. 72 // As a common problematic scenario, 73 // when a node is added but not ready, NodeAdd event is filtered out by preCheck and doesn't arrive. 74 // In such cases, this plugin may miss some events that actually make pods schedulable. 75 // As a workaround, we add UpdateNodeTaint event to catch the case. 76 // We can remove UpdateNodeTaint when we remove the preCheck feature. 77 // See: https://github.com/kubernetes/kubernetes/issues/110175 78 {Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.All}, QueueingHintFn: pl.isSchedulableAfterPodChange}, 79 {Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeLabel | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange}, 80 } 81 } 82 83 // New initializes a new plugin and returns it. 84 func New(_ context.Context, plArgs runtime.Object, h framework.Handle) (framework.Plugin, error) { 85 if h.SnapshotSharedLister() == nil { 86 return nil, fmt.Errorf("SnapshotSharedlister is nil") 87 } 88 args, err := getArgs(plArgs) 89 if err != nil { 90 return nil, err 91 } 92 if err := validation.ValidateInterPodAffinityArgs(nil, &args); err != nil { 93 return nil, err 94 } 95 pl := &InterPodAffinity{ 96 parallelizer: h.Parallelizer(), 97 args: args, 98 sharedLister: h.SnapshotSharedLister(), 99 nsLister: h.SharedInformerFactory().Core().V1().Namespaces().Lister(), 100 } 101 102 return pl, nil 103 } 104 105 func getArgs(obj runtime.Object) (config.InterPodAffinityArgs, error) { 106 ptr, ok := obj.(*config.InterPodAffinityArgs) 107 if !ok { 108 return config.InterPodAffinityArgs{}, fmt.Errorf("want args to be of type InterPodAffinityArgs, got %T", obj) 109 } 110 return *ptr, nil 111 } 112 113 // Updates Namespaces with the set of namespaces identified by NamespaceSelector. 114 // If successful, NamespaceSelector is set to nil. 115 // The assumption is that the term is for an incoming pod, in which case 116 // namespaceSelector is either unrolled into Namespaces (and so the selector 117 // is set to Nothing()) or is Empty(), which means match everything. Therefore, 118 // there when matching against this term, there is no need to lookup the existing 119 // pod's namespace labels to match them against term's namespaceSelector explicitly. 120 func (pl *InterPodAffinity) mergeAffinityTermNamespacesIfNotEmpty(at *framework.AffinityTerm) error { 121 if at.NamespaceSelector.Empty() { 122 return nil 123 } 124 ns, err := pl.nsLister.List(at.NamespaceSelector) 125 if err != nil { 126 return err 127 } 128 for _, n := range ns { 129 at.Namespaces.Insert(n.Name) 130 } 131 at.NamespaceSelector = labels.Nothing() 132 return nil 133 } 134 135 // GetNamespaceLabelsSnapshot returns a snapshot of the labels associated with 136 // the namespace. 137 func GetNamespaceLabelsSnapshot(logger klog.Logger, ns string, nsLister listersv1.NamespaceLister) (nsLabels labels.Set) { 138 podNS, err := nsLister.Get(ns) 139 if err == nil { 140 // Create and return snapshot of the labels. 141 return labels.Merge(podNS.Labels, nil) 142 } 143 logger.V(3).Info("getting namespace, assuming empty set of namespace labels", "namespace", ns, "err", err) 144 return 145 } 146 147 func (pl *InterPodAffinity) isSchedulableAfterPodChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { 148 originalPod, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj) 149 if err != nil { 150 return framework.Queue, err 151 } 152 if (modifiedPod != nil && modifiedPod.Spec.NodeName == "") || (originalPod != nil && originalPod.Spec.NodeName == "") { 153 logger.V(5).Info("the added/updated/deleted pod is unscheduled, so it doesn't make the target pod schedulable", 154 "pod", klog.KObj(pod), "originalPod", klog.KObj(originalPod), "modifiedPod", klog.KObj(modifiedPod)) 155 return framework.QueueSkip, nil 156 } 157 158 terms, err := framework.GetAffinityTerms(pod, framework.GetPodAffinityTerms(pod.Spec.Affinity)) 159 if err != nil { 160 return framework.Queue, err 161 } 162 163 antiTerms, err := framework.GetAffinityTerms(pod, framework.GetPodAntiAffinityTerms(pod.Spec.Affinity)) 164 if err != nil { 165 return framework.Queue, err 166 } 167 168 // Pod is updated. Return Queue when the updated pod matching the target pod's affinity or not matching anti-affinity. 169 // Note that, we don't need to check each affinity individually when the Pod has more than one affinity 170 // because the current PodAffinity looks for a **single** existing pod that can satisfy **all** the terms of inter-pod affinity of an incoming pod. 171 if modifiedPod != nil && originalPod != nil { 172 if !podMatchesAllAffinityTerms(terms, originalPod) && podMatchesAllAffinityTerms(terms, modifiedPod) { 173 logger.V(5).Info("a scheduled pod was updated to match the target pod's affinity, and the pod may be schedulable now", 174 "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) 175 return framework.Queue, nil 176 } 177 if podMatchesAllAffinityTerms(antiTerms, originalPod) && !podMatchesAllAffinityTerms(antiTerms, modifiedPod) { 178 logger.V(5).Info("a scheduled pod was updated not to match the target pod's anti affinity, and the pod may be schedulable now", 179 "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) 180 return framework.Queue, nil 181 } 182 logger.V(5).Info("a scheduled pod was updated but it doesn't match the target pod's affinity or does match the target pod's anti-affinity", 183 "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) 184 return framework.QueueSkip, nil 185 } 186 187 // Pod is added. Return Queue when the added pod matching the target pod's affinity. 188 if modifiedPod != nil { 189 if podMatchesAllAffinityTerms(terms, modifiedPod) { 190 logger.V(5).Info("a scheduled pod was added and it matches the target pod's affinity", 191 "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) 192 return framework.Queue, nil 193 } 194 logger.V(5).Info("a scheduled pod was added and it doesn't match the target pod's affinity", 195 "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) 196 return framework.QueueSkip, nil 197 } 198 199 // Pod is deleted. Return Queue when the deleted pod matching the target pod's anti-affinity. 200 if !podMatchesAllAffinityTerms(antiTerms, originalPod) { 201 logger.V(5).Info("a scheduled pod was deleted but it doesn't match the target pod's anti-affinity", 202 "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) 203 return framework.QueueSkip, nil 204 } 205 logger.V(5).Info("a scheduled pod was deleted and it matches the target pod's anti-affinity. The pod may be schedulable now", 206 "pod", klog.KObj(pod), "modifiedPod", klog.KObj(modifiedPod)) 207 return framework.Queue, nil 208 } 209 210 func (pl *InterPodAffinity) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) { 211 _, modifiedNode, err := util.As[*v1.Node](oldObj, newObj) 212 if err != nil { 213 return framework.Queue, err 214 } 215 216 terms, err := framework.GetAffinityTerms(pod, framework.GetPodAffinityTerms(pod.Spec.Affinity)) 217 if err != nil { 218 return framework.Queue, err 219 } 220 221 for _, term := range terms { 222 if _, ok := modifiedNode.Labels[term.TopologyKey]; ok { 223 logger.V(5).Info("a node with matched pod affinity topologyKey was added/updated and it may make pod schedulable", 224 "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) 225 return framework.Queue, err 226 } 227 } 228 229 antiTerms, err := framework.GetAffinityTerms(pod, framework.GetPodAntiAffinityTerms(pod.Spec.Affinity)) 230 if err != nil { 231 return framework.Queue, err 232 } 233 234 for _, term := range antiTerms { 235 if _, ok := modifiedNode.Labels[term.TopologyKey]; ok { 236 logger.V(5).Info("a node with matched pod anti-affinity topologyKey was added/updated and it may make pod schedulable", 237 "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) 238 return framework.Queue, err 239 } 240 } 241 logger.V(5).Info("a node is added/updated but doesn't have any topologyKey which matches pod affinity/anti-affinity", 242 "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode)) 243 return framework.QueueSkip, nil 244 }