k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package podtopologyspread
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"sync/atomic"
    24  
    25  	v1 "k8s.io/api/core/v1"
    26  	"k8s.io/apimachinery/pkg/util/sets"
    27  	"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"
    28  	"k8s.io/kubernetes/pkg/scheduler/framework"
    29  )
    30  
    31  const preScoreStateKey = "PreScore" + Name
    32  const invalidScore = -1
    33  
    34  // preScoreState computed at PreScore and used at Score.
    35  // Fields are exported for comparison during testing.
    36  type preScoreState struct {
    37  	Constraints []topologySpreadConstraint
    38  	// IgnoredNodes is a set of node names which miss some Constraints[*].topologyKey.
    39  	IgnoredNodes sets.Set[string]
    40  	// TopologyPairToPodCounts is keyed with topologyPair, and valued with the number of matching pods.
    41  	TopologyPairToPodCounts map[topologyPair]*int64
    42  	// TopologyNormalizingWeight is the weight we give to the counts per topology.
    43  	// This allows the pod counts of smaller topologies to not be watered down by
    44  	// bigger ones.
    45  	TopologyNormalizingWeight []float64
    46  }
    47  
    48  // Clone implements the mandatory Clone interface. We don't really copy the data since
    49  // there is no need for that.
    50  func (s *preScoreState) Clone() framework.StateData {
    51  	return s
    52  }
    53  
    54  // initPreScoreState iterates "filteredNodes" to filter out the nodes which
    55  // don't have required topologyKey(s), and initialize:
    56  // 1) s.TopologyPairToPodCounts: keyed with both eligible topology pair and node names.
    57  // 2) s.IgnoredNodes: the set of nodes that shouldn't be scored.
    58  // 3) s.TopologyNormalizingWeight: The weight to be given to each constraint based on the number of values in a topology.
    59  func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []*framework.NodeInfo, requireAllTopologies bool) error {
    60  	var err error
    61  	if len(pod.Spec.TopologySpreadConstraints) > 0 {
    62  		s.Constraints, err = pl.filterTopologySpreadConstraints(
    63  			pod.Spec.TopologySpreadConstraints,
    64  			pod.Labels,
    65  			v1.ScheduleAnyway,
    66  		)
    67  		if err != nil {
    68  			return fmt.Errorf("obtaining pod's soft topology spread constraints: %w", err)
    69  		}
    70  	} else {
    71  		s.Constraints, err = pl.buildDefaultConstraints(pod, v1.ScheduleAnyway)
    72  		if err != nil {
    73  			return fmt.Errorf("setting default soft topology spread constraints: %w", err)
    74  		}
    75  	}
    76  	if len(s.Constraints) == 0 {
    77  		return nil
    78  	}
    79  	topoSize := make([]int, len(s.Constraints))
    80  	for _, node := range filteredNodes {
    81  		if requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Node().Labels, s.Constraints) {
    82  			// Nodes which don't have all required topologyKeys present are ignored
    83  			// when scoring later.
    84  			s.IgnoredNodes.Insert(node.Node().Name)
    85  			continue
    86  		}
    87  		for i, constraint := range s.Constraints {
    88  			// per-node counts are calculated during Score.
    89  			if constraint.TopologyKey == v1.LabelHostname {
    90  				continue
    91  			}
    92  			pair := topologyPair{key: constraint.TopologyKey, value: node.Node().Labels[constraint.TopologyKey]}
    93  			if s.TopologyPairToPodCounts[pair] == nil {
    94  				s.TopologyPairToPodCounts[pair] = new(int64)
    95  				topoSize[i]++
    96  			}
    97  		}
    98  	}
    99  
   100  	s.TopologyNormalizingWeight = make([]float64, len(s.Constraints))
   101  	for i, c := range s.Constraints {
   102  		sz := topoSize[i]
   103  		if c.TopologyKey == v1.LabelHostname {
   104  			sz = len(filteredNodes) - len(s.IgnoredNodes)
   105  		}
   106  		s.TopologyNormalizingWeight[i] = topologyNormalizingWeight(sz)
   107  	}
   108  	return nil
   109  }
   110  
   111  // PreScore builds and writes cycle state used by Score and NormalizeScore.
   112  func (pl *PodTopologySpread) PreScore(
   113  	ctx context.Context,
   114  	cycleState *framework.CycleState,
   115  	pod *v1.Pod,
   116  	filteredNodes []*framework.NodeInfo,
   117  ) *framework.Status {
   118  	allNodes, err := pl.sharedLister.NodeInfos().List()
   119  	if err != nil {
   120  		return framework.AsStatus(fmt.Errorf("getting all nodes: %w", err))
   121  	}
   122  
   123  	if len(allNodes) == 0 {
   124  		// No need to score.
   125  		return framework.NewStatus(framework.Skip)
   126  	}
   127  
   128  	state := &preScoreState{
   129  		IgnoredNodes:            sets.New[string](),
   130  		TopologyPairToPodCounts: make(map[topologyPair]*int64),
   131  	}
   132  	// Only require that nodes have all the topology labels if using
   133  	// non-system-default spreading rules. This allows nodes that don't have a
   134  	// zone label to still have hostname spreading.
   135  	requireAllTopologies := len(pod.Spec.TopologySpreadConstraints) > 0 || !pl.systemDefaulted
   136  	err = pl.initPreScoreState(state, pod, filteredNodes, requireAllTopologies)
   137  	if err != nil {
   138  		return framework.AsStatus(fmt.Errorf("calculating preScoreState: %w", err))
   139  	}
   140  
   141  	// return Skip if incoming pod doesn't have soft topology spread Constraints.
   142  	if len(state.Constraints) == 0 {
   143  		return framework.NewStatus(framework.Skip)
   144  	}
   145  
   146  	// Ignore parsing errors for backwards compatibility.
   147  	requiredNodeAffinity := nodeaffinity.GetRequiredNodeAffinity(pod)
   148  	processAllNode := func(i int) {
   149  		nodeInfo := allNodes[i]
   150  		node := nodeInfo.Node()
   151  
   152  		if !pl.enableNodeInclusionPolicyInPodTopologySpread {
   153  			// `node` should satisfy incoming pod's NodeSelector/NodeAffinity
   154  			if match, _ := requiredNodeAffinity.Match(node); !match {
   155  				return
   156  			}
   157  		}
   158  
   159  		// All topologyKeys need to be present in `node`
   160  		if requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Labels, state.Constraints) {
   161  			return
   162  		}
   163  
   164  		for _, c := range state.Constraints {
   165  			if pl.enableNodeInclusionPolicyInPodTopologySpread &&
   166  				!c.matchNodeInclusionPolicies(pod, node, requiredNodeAffinity) {
   167  				continue
   168  			}
   169  
   170  			pair := topologyPair{key: c.TopologyKey, value: node.Labels[c.TopologyKey]}
   171  			// If current topology pair is not associated with any candidate node,
   172  			// continue to avoid unnecessary calculation.
   173  			// Per-node counts are also skipped, as they are done during Score.
   174  			tpCount := state.TopologyPairToPodCounts[pair]
   175  			if tpCount == nil {
   176  				continue
   177  			}
   178  			count := countPodsMatchSelector(nodeInfo.Pods, c.Selector, pod.Namespace)
   179  			atomic.AddInt64(tpCount, int64(count))
   180  		}
   181  	}
   182  	pl.parallelizer.Until(ctx, len(allNodes), processAllNode, pl.Name())
   183  
   184  	cycleState.Write(preScoreStateKey, state)
   185  	return nil
   186  }
   187  
   188  // Score invoked at the Score extension point.
   189  // The "score" returned in this function is the matching number of pods on the `nodeName`,
   190  // it is normalized later.
   191  func (pl *PodTopologySpread) Score(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) {
   192  	nodeInfo, err := pl.sharedLister.NodeInfos().Get(nodeName)
   193  	if err != nil {
   194  		return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
   195  	}
   196  
   197  	node := nodeInfo.Node()
   198  	s, err := getPreScoreState(cycleState)
   199  	if err != nil {
   200  		return 0, framework.AsStatus(err)
   201  	}
   202  
   203  	// Return if the node is not qualified.
   204  	if s.IgnoredNodes.Has(node.Name) {
   205  		return 0, nil
   206  	}
   207  
   208  	// For each present <pair>, current node gets a credit of <matchSum>.
   209  	// And we sum up <matchSum> and return it as this node's score.
   210  	var score float64
   211  	for i, c := range s.Constraints {
   212  		if tpVal, ok := node.Labels[c.TopologyKey]; ok {
   213  			var cnt int64
   214  			if c.TopologyKey == v1.LabelHostname {
   215  				cnt = int64(countPodsMatchSelector(nodeInfo.Pods, c.Selector, pod.Namespace))
   216  			} else {
   217  				pair := topologyPair{key: c.TopologyKey, value: tpVal}
   218  				cnt = *s.TopologyPairToPodCounts[pair]
   219  			}
   220  			score += scoreForCount(cnt, c.MaxSkew, s.TopologyNormalizingWeight[i])
   221  		}
   222  	}
   223  	return int64(math.Round(score)), nil
   224  }
   225  
   226  // NormalizeScore invoked after scoring all nodes.
   227  func (pl *PodTopologySpread) NormalizeScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, scores framework.NodeScoreList) *framework.Status {
   228  	s, err := getPreScoreState(cycleState)
   229  	if err != nil {
   230  		return framework.AsStatus(err)
   231  	}
   232  	if s == nil {
   233  		return nil
   234  	}
   235  
   236  	// Calculate <minScore> and <maxScore>
   237  	var minScore int64 = math.MaxInt64
   238  	var maxScore int64
   239  	for i, score := range scores {
   240  		// it's mandatory to check if <score.Name> is present in m.IgnoredNodes
   241  		if s.IgnoredNodes.Has(score.Name) {
   242  			scores[i].Score = invalidScore
   243  			continue
   244  		}
   245  		if score.Score < minScore {
   246  			minScore = score.Score
   247  		}
   248  		if score.Score > maxScore {
   249  			maxScore = score.Score
   250  		}
   251  	}
   252  
   253  	for i := range scores {
   254  		if scores[i].Score == invalidScore {
   255  			scores[i].Score = 0
   256  			continue
   257  		}
   258  		if maxScore == 0 {
   259  			scores[i].Score = framework.MaxNodeScore
   260  			continue
   261  		}
   262  		s := scores[i].Score
   263  		scores[i].Score = framework.MaxNodeScore * (maxScore + minScore - s) / maxScore
   264  	}
   265  	return nil
   266  }
   267  
   268  // ScoreExtensions of the Score plugin.
   269  func (pl *PodTopologySpread) ScoreExtensions() framework.ScoreExtensions {
   270  	return pl
   271  }
   272  
   273  func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) {
   274  	c, err := cycleState.Read(preScoreStateKey)
   275  	if err != nil {
   276  		return nil, fmt.Errorf("error reading %q from cycleState: %w", preScoreStateKey, err)
   277  	}
   278  
   279  	s, ok := c.(*preScoreState)
   280  	if !ok {
   281  		return nil, fmt.Errorf("%+v  convert to podtopologyspread.preScoreState error", c)
   282  	}
   283  	return s, nil
   284  }
   285  
   286  // topologyNormalizingWeight calculates the weight for the topology, based on
   287  // the number of values that exist for a topology.
   288  // Since <size> is at least 1 (all nodes that passed the Filters are in the
   289  // same topology), and k8s supports 5k nodes, the result is in the interval
   290  // <1.09, 8.52>.
   291  //
   292  // Note: <size> could also be zero when no nodes have the required topologies,
   293  // however we don't care about topology weight in this case as we return a 0
   294  // score for all nodes.
   295  func topologyNormalizingWeight(size int) float64 {
   296  	return math.Log(float64(size + 2))
   297  }
   298  
   299  // scoreForCount calculates the score based on number of matching pods in a
   300  // topology domain, the constraint's maxSkew and the topology weight.
   301  // `maxSkew-1` is added to the score so that differences between topology
   302  // domains get watered down, controlling the tolerance of the score to skews.
   303  func scoreForCount(cnt int64, maxSkew int32, tpWeight float64) float64 {
   304  	return float64(cnt)*tpWeight + float64(maxSkew-1)
   305  }