github.com/kubewharf/katalyst-core@v0.5.3/pkg/scheduler/plugins/noderesourcetopology/plugin.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package noderesourcetopology
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  
    23  	v1 "k8s.io/api/core/v1"
    24  	"k8s.io/apimachinery/pkg/api/resource"
    25  	"k8s.io/apimachinery/pkg/runtime"
    26  	"k8s.io/apimachinery/pkg/util/sets"
    27  	quotav1 "k8s.io/apiserver/pkg/quota/v1"
    28  	"k8s.io/klog/v2"
    29  	"k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
    30  	"k8s.io/kubernetes/pkg/scheduler/apis/config"
    31  	"k8s.io/kubernetes/pkg/scheduler/framework"
    32  
    33  	"github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
    34  	apisconfig "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config"
    35  	"github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config/validation"
    36  	"github.com/kubewharf/katalyst-api/pkg/consts"
    37  	"github.com/kubewharf/katalyst-core/pkg/scheduler/eventhandlers"
    38  	"github.com/kubewharf/katalyst-core/pkg/scheduler/util"
    39  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    40  )
    41  
    42  const (
    43  	TopologyMatchName = "NodeResourceTopology"
    44  )
    45  
    46  var nativeAlignedResources = sets.NewString()
    47  
    48  type (
    49  	filterFn  func(*v1.Pod, []*v1alpha1.TopologyZone, *framework.NodeInfo) *framework.Status
    50  	scoringFn func(*v1.Pod, []*v1alpha1.TopologyZone) (int64, *framework.Status)
    51  )
    52  
    53  type NUMANode struct {
    54  	SocketID    string
    55  	NUMAID      int
    56  	Capacity    v1.ResourceList
    57  	Allocatable v1.ResourceList
    58  	Available   v1.ResourceList
    59  	Costs       map[int]int
    60  }
    61  
    62  type NUMANodeList []NUMANode
    63  
    64  type TopologyMatch struct {
    65  	scoreStrategyFunc   scoreStrategyFn
    66  	scoreStrategyType   config.ScoringStrategyType
    67  	resourceToWeightMap resourceToWeightMap
    68  	alignedResources    sets.String
    69  	resourcePolicy      consts.ResourcePluginPolicyName
    70  	sharedLister        framework.SharedLister
    71  }
    72  
    73  var (
    74  	_ framework.FilterPlugin      = &TopologyMatch{}
    75  	_ framework.ScorePlugin       = &TopologyMatch{}
    76  	_ framework.ReservePlugin     = &TopologyMatch{}
    77  	_ framework.EnqueueExtensions = &TopologyMatch{}
    78  )
    79  
    80  // Name returns name of the plugin.
    81  func (tm *TopologyMatch) Name() string {
    82  	return TopologyMatchName
    83  }
    84  
    85  func New(args runtime.Object, h framework.Handle) (framework.Plugin, error) {
    86  	klog.Info("Creating new TopologyMatch plugin")
    87  	klog.Infof("args: %+v", args)
    88  	tcfg, ok := args.(*apisconfig.NodeResourceTopologyArgs)
    89  	if !ok {
    90  		return nil, fmt.Errorf("want args to be of type NodeResourceTopologyArgs, got %T", args)
    91  	}
    92  
    93  	if err := validation.ValidateNodeResourceTopologyMatchArgs(nil, tcfg); err != nil {
    94  		return nil, err
    95  	}
    96  
    97  	resourceToWeightMap := make(resourceToWeightMap)
    98  	for _, resource := range tcfg.ScoringStrategy.Resources {
    99  		resourceToWeightMap[v1.ResourceName(resource.Name)] = resource.Weight
   100  	}
   101  
   102  	alignedResources := sets.NewString(tcfg.AlignedResources...)
   103  
   104  	strategy, err := getScoringStrategyFunction(tcfg.ScoringStrategy.Type)
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  
   109  	eventhandlers.RegisterCommonPodHandler()
   110  	eventhandlers.RegisterCommonCNRHandler()
   111  
   112  	return &TopologyMatch{
   113  		scoreStrategyType:   tcfg.ScoringStrategy.Type,
   114  		alignedResources:    alignedResources,
   115  		resourceToWeightMap: resourceToWeightMap,
   116  		scoreStrategyFunc:   strategy,
   117  		resourcePolicy:      tcfg.ResourcePluginPolicy,
   118  		sharedLister:        h.SnapshotSharedLister(),
   119  	}, nil
   120  }
   121  
   122  // EventsToRegister returns the possible events that may make a Pod
   123  // failed by this plugin schedulable.
   124  // NOTE: if in-place-update (KEP 1287) gets implemented, then PodUpdate event
   125  // should be registered for this plugin since a Pod update may free up resources
   126  // that make other Pods schedulable.
   127  func (tm *TopologyMatch) EventsToRegister() []framework.ClusterEvent {
   128  	// To register a custom event, follow the naming convention at:
   129  	// https://git.k8s.io/kubernetes/pkg/scheduler/eventhandlers.go#L403-L410
   130  	cnrGVK := fmt.Sprintf("customnoderesources.v1alpha1.%v", v1alpha1.GroupName)
   131  	return []framework.ClusterEvent{
   132  		{Resource: framework.Pod, ActionType: framework.Delete},
   133  		{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeAllocatable},
   134  		{Resource: framework.GVK(cnrGVK), ActionType: framework.Add | framework.Update},
   135  	}
   136  }
   137  
   138  func (tm *TopologyMatch) topologyMatchSupport(pod *v1.Pod) bool {
   139  	if tm.resourcePolicy == consts.ResourcePluginPolicyNameNative {
   140  		// native policy, only Guaranteed pod with full CPU supported
   141  		if qos.GetPodQOS(pod) == v1.PodQOSGuaranteed && util.IsRequestFullCPU(pod) {
   142  			return true
   143  		}
   144  		return false
   145  	}
   146  
   147  	if tm.resourcePolicy == consts.ResourcePluginPolicyNameDynamic {
   148  		// dynamic policy, only dedicated_cores with numaBinding supported
   149  		if util.IsDedicatedPod(pod) && util.IsNumaBinding(pod) {
   150  			return true
   151  		}
   152  	}
   153  
   154  	return false
   155  }
   156  
   157  func (tm *TopologyMatch) dedicatedPodsFilter(nodeInfo *framework.NodeInfo) func(consumer string) bool {
   158  	dedicatedPods := make(map[string]struct{})
   159  	for _, podInfo := range nodeInfo.Pods {
   160  		if util.IsDedicatedPod(podInfo.Pod) {
   161  			key := native.GenerateNamespaceNameKey(podInfo.Pod.Namespace, podInfo.Pod.Name)
   162  			dedicatedPods[key] = struct{}{}
   163  		}
   164  	}
   165  
   166  	return func(consumer string) bool {
   167  		namespace, name, _, err := native.ParseNamespaceNameUIDKey(consumer)
   168  		if err != nil {
   169  			klog.Errorf("ParseNamespaceNameUIDKey consumer %v fail: %v", consumer, err)
   170  			return false
   171  		}
   172  
   173  		// read only after map inited
   174  		key := native.GenerateNamespaceNameKey(namespace, name)
   175  		if _, ok := dedicatedPods[key]; ok {
   176  			return true
   177  		}
   178  
   179  		return false
   180  	}
   181  }
   182  
   183  func getScoringStrategyFunction(strategy config.ScoringStrategyType) (scoreStrategyFn, error) {
   184  	switch strategy {
   185  	case config.MostAllocated:
   186  		return mostAllocatedScoreStrategy, nil
   187  	case config.LeastAllocated:
   188  		return leastAllocatedScoreStrategy, nil
   189  	case consts.BalancedAllocation:
   190  		return balancedAllocationScoreStrategy, nil
   191  	case consts.LeastNUMANodes:
   192  		return nil, fmt.Errorf("LeastNUMANodes not support yet")
   193  	default:
   194  		return nil, fmt.Errorf("illegal scoring strategy found")
   195  	}
   196  }
   197  
   198  func TopologyZonesToNUMANodeList(zones []*v1alpha1.TopologyZone) NUMANodeList {
   199  	nodes := NUMANodeList{}
   200  
   201  	for _, topologyZone := range zones {
   202  		if topologyZone.Type != v1alpha1.TopologyTypeSocket {
   203  			continue
   204  		}
   205  		for _, child := range topologyZone.Children {
   206  			if child.Type != v1alpha1.TopologyTypeNuma {
   207  				continue
   208  			}
   209  			numaID, err := getID(child.Name)
   210  			if err != nil {
   211  				klog.Error(err)
   212  				continue
   213  			}
   214  			capacity, allocatable, available := extractAvailableResources(child)
   215  			nodes = append(nodes, NUMANode{
   216  				SocketID:    topologyZone.Name,
   217  				NUMAID:      numaID,
   218  				Capacity:    capacity,
   219  				Allocatable: allocatable,
   220  				Available:   available,
   221  			})
   222  		}
   223  	}
   224  
   225  	return nodes
   226  }
   227  
   228  func TopologyZonesToNUMANodeMap(zones []*v1alpha1.TopologyZone) map[int]NUMANode {
   229  	numaNodeMap := make(map[int]NUMANode)
   230  
   231  	for _, topologyZone := range zones {
   232  		if topologyZone.Type != v1alpha1.TopologyTypeSocket {
   233  			continue
   234  		}
   235  		for _, child := range topologyZone.Children {
   236  			if child.Type != v1alpha1.TopologyTypeNuma {
   237  				continue
   238  			}
   239  			numaID, err := getID(child.Name)
   240  			if err != nil {
   241  				klog.Error(err)
   242  				continue
   243  			}
   244  			capacity, allocatable, available := extractAvailableResources(child)
   245  			numaNodeMap[numaID] = NUMANode{
   246  				SocketID:    topologyZone.Name,
   247  				NUMAID:      numaID,
   248  				Capacity:    capacity,
   249  				Allocatable: allocatable,
   250  				Available:   available,
   251  			}
   252  		}
   253  	}
   254  
   255  	return numaNodeMap
   256  }
   257  
   258  func getID(name string) (int, error) {
   259  	numaID, err := strconv.Atoi(name)
   260  	if err != nil {
   261  		return -1, fmt.Errorf("invalid zone format zone: %s : %v", name, err)
   262  	}
   263  
   264  	if numaID > maxNUMAId-1 || numaID < 0 {
   265  		return -1, fmt.Errorf("invalid NUMA id range numaID: %d", numaID)
   266  	}
   267  
   268  	return numaID, nil
   269  }
   270  
   271  func extractAvailableResources(zone *v1alpha1.TopologyZone) (capacity, allocatable, available v1.ResourceList) {
   272  	used := make(v1.ResourceList)
   273  	for _, alloc := range zone.Allocations {
   274  		for resName, quantity := range *alloc.Requests {
   275  			if _, ok := used[resName]; !ok {
   276  				used[resName] = quantity.DeepCopy()
   277  			} else {
   278  				value := used[resName]
   279  				value.Add(quantity)
   280  				used[resName] = value
   281  			}
   282  		}
   283  	}
   284  	return zone.Resources.Capacity.DeepCopy(), zone.Resources.Allocatable.DeepCopy(), quotav1.SubtractWithNonNegativeResult(*zone.Resources.Allocatable, used)
   285  }
   286  
   287  func minNumaNodeCount(resourceName v1.ResourceName, quantity resource.Quantity, numaNodeMap map[int]NUMANode) int {
   288  	var (
   289  		i           = 0
   290  		sumResource resource.Quantity
   291  	)
   292  
   293  	// allocatable in each numa may not equal because of resource reserve
   294  	for _, numaNode := range numaNodeMap {
   295  		i++
   296  		if i == 1 {
   297  			sumResource = numaNode.Capacity[resourceName]
   298  		} else {
   299  			sumResource.Add(numaNode.Capacity[resourceName])
   300  		}
   301  		if sumResource.Cmp(quantity) >= 0 {
   302  			return i
   303  		}
   304  	}
   305  	return i
   306  }