github.com/kubewharf/katalyst-core@v0.5.3/pkg/scheduler/plugins/noderesourcetopology/plugin.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package noderesourcetopology 18 19 import ( 20 "fmt" 21 "strconv" 22 23 v1 "k8s.io/api/core/v1" 24 "k8s.io/apimachinery/pkg/api/resource" 25 "k8s.io/apimachinery/pkg/runtime" 26 "k8s.io/apimachinery/pkg/util/sets" 27 quotav1 "k8s.io/apiserver/pkg/quota/v1" 28 "k8s.io/klog/v2" 29 "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" 30 "k8s.io/kubernetes/pkg/scheduler/apis/config" 31 "k8s.io/kubernetes/pkg/scheduler/framework" 32 33 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" 34 apisconfig "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config" 35 "github.com/kubewharf/katalyst-api/pkg/apis/scheduling/config/validation" 36 "github.com/kubewharf/katalyst-api/pkg/consts" 37 "github.com/kubewharf/katalyst-core/pkg/scheduler/eventhandlers" 38 "github.com/kubewharf/katalyst-core/pkg/scheduler/util" 39 "github.com/kubewharf/katalyst-core/pkg/util/native" 40 ) 41 42 const ( 43 TopologyMatchName = "NodeResourceTopology" 44 ) 45 46 var nativeAlignedResources = sets.NewString() 47 48 type ( 49 filterFn func(*v1.Pod, []*v1alpha1.TopologyZone, *framework.NodeInfo) *framework.Status 50 scoringFn func(*v1.Pod, []*v1alpha1.TopologyZone) (int64, *framework.Status) 51 ) 52 53 type NUMANode struct { 54 SocketID string 55 NUMAID int 56 Capacity v1.ResourceList 57 Allocatable v1.ResourceList 58 Available v1.ResourceList 59 Costs map[int]int 60 } 61 62 type NUMANodeList []NUMANode 63 64 type TopologyMatch struct { 65 scoreStrategyFunc scoreStrategyFn 66 scoreStrategyType config.ScoringStrategyType 67 resourceToWeightMap resourceToWeightMap 68 alignedResources sets.String 69 resourcePolicy consts.ResourcePluginPolicyName 70 sharedLister framework.SharedLister 71 } 72 73 var ( 74 _ framework.FilterPlugin = &TopologyMatch{} 75 _ framework.ScorePlugin = &TopologyMatch{} 76 _ framework.ReservePlugin = &TopologyMatch{} 77 _ framework.EnqueueExtensions = &TopologyMatch{} 78 ) 79 80 // Name returns name of the plugin. 81 func (tm *TopologyMatch) Name() string { 82 return TopologyMatchName 83 } 84 85 func New(args runtime.Object, h framework.Handle) (framework.Plugin, error) { 86 klog.Info("Creating new TopologyMatch plugin") 87 klog.Infof("args: %+v", args) 88 tcfg, ok := args.(*apisconfig.NodeResourceTopologyArgs) 89 if !ok { 90 return nil, fmt.Errorf("want args to be of type NodeResourceTopologyArgs, got %T", args) 91 } 92 93 if err := validation.ValidateNodeResourceTopologyMatchArgs(nil, tcfg); err != nil { 94 return nil, err 95 } 96 97 resourceToWeightMap := make(resourceToWeightMap) 98 for _, resource := range tcfg.ScoringStrategy.Resources { 99 resourceToWeightMap[v1.ResourceName(resource.Name)] = resource.Weight 100 } 101 102 alignedResources := sets.NewString(tcfg.AlignedResources...) 103 104 strategy, err := getScoringStrategyFunction(tcfg.ScoringStrategy.Type) 105 if err != nil { 106 return nil, err 107 } 108 109 eventhandlers.RegisterCommonPodHandler() 110 eventhandlers.RegisterCommonCNRHandler() 111 112 return &TopologyMatch{ 113 scoreStrategyType: tcfg.ScoringStrategy.Type, 114 alignedResources: alignedResources, 115 resourceToWeightMap: resourceToWeightMap, 116 scoreStrategyFunc: strategy, 117 resourcePolicy: tcfg.ResourcePluginPolicy, 118 sharedLister: h.SnapshotSharedLister(), 119 }, nil 120 } 121 122 // EventsToRegister returns the possible events that may make a Pod 123 // failed by this plugin schedulable. 124 // NOTE: if in-place-update (KEP 1287) gets implemented, then PodUpdate event 125 // should be registered for this plugin since a Pod update may free up resources 126 // that make other Pods schedulable. 127 func (tm *TopologyMatch) EventsToRegister() []framework.ClusterEvent { 128 // To register a custom event, follow the naming convention at: 129 // https://git.k8s.io/kubernetes/pkg/scheduler/eventhandlers.go#L403-L410 130 cnrGVK := fmt.Sprintf("customnoderesources.v1alpha1.%v", v1alpha1.GroupName) 131 return []framework.ClusterEvent{ 132 {Resource: framework.Pod, ActionType: framework.Delete}, 133 {Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeAllocatable}, 134 {Resource: framework.GVK(cnrGVK), ActionType: framework.Add | framework.Update}, 135 } 136 } 137 138 func (tm *TopologyMatch) topologyMatchSupport(pod *v1.Pod) bool { 139 if tm.resourcePolicy == consts.ResourcePluginPolicyNameNative { 140 // native policy, only Guaranteed pod with full CPU supported 141 if qos.GetPodQOS(pod) == v1.PodQOSGuaranteed && util.IsRequestFullCPU(pod) { 142 return true 143 } 144 return false 145 } 146 147 if tm.resourcePolicy == consts.ResourcePluginPolicyNameDynamic { 148 // dynamic policy, only dedicated_cores with numaBinding supported 149 if util.IsDedicatedPod(pod) && util.IsNumaBinding(pod) { 150 return true 151 } 152 } 153 154 return false 155 } 156 157 func (tm *TopologyMatch) dedicatedPodsFilter(nodeInfo *framework.NodeInfo) func(consumer string) bool { 158 dedicatedPods := make(map[string]struct{}) 159 for _, podInfo := range nodeInfo.Pods { 160 if util.IsDedicatedPod(podInfo.Pod) { 161 key := native.GenerateNamespaceNameKey(podInfo.Pod.Namespace, podInfo.Pod.Name) 162 dedicatedPods[key] = struct{}{} 163 } 164 } 165 166 return func(consumer string) bool { 167 namespace, name, _, err := native.ParseNamespaceNameUIDKey(consumer) 168 if err != nil { 169 klog.Errorf("ParseNamespaceNameUIDKey consumer %v fail: %v", consumer, err) 170 return false 171 } 172 173 // read only after map inited 174 key := native.GenerateNamespaceNameKey(namespace, name) 175 if _, ok := dedicatedPods[key]; ok { 176 return true 177 } 178 179 return false 180 } 181 } 182 183 func getScoringStrategyFunction(strategy config.ScoringStrategyType) (scoreStrategyFn, error) { 184 switch strategy { 185 case config.MostAllocated: 186 return mostAllocatedScoreStrategy, nil 187 case config.LeastAllocated: 188 return leastAllocatedScoreStrategy, nil 189 case consts.BalancedAllocation: 190 return balancedAllocationScoreStrategy, nil 191 case consts.LeastNUMANodes: 192 return nil, fmt.Errorf("LeastNUMANodes not support yet") 193 default: 194 return nil, fmt.Errorf("illegal scoring strategy found") 195 } 196 } 197 198 func TopologyZonesToNUMANodeList(zones []*v1alpha1.TopologyZone) NUMANodeList { 199 nodes := NUMANodeList{} 200 201 for _, topologyZone := range zones { 202 if topologyZone.Type != v1alpha1.TopologyTypeSocket { 203 continue 204 } 205 for _, child := range topologyZone.Children { 206 if child.Type != v1alpha1.TopologyTypeNuma { 207 continue 208 } 209 numaID, err := getID(child.Name) 210 if err != nil { 211 klog.Error(err) 212 continue 213 } 214 capacity, allocatable, available := extractAvailableResources(child) 215 nodes = append(nodes, NUMANode{ 216 SocketID: topologyZone.Name, 217 NUMAID: numaID, 218 Capacity: capacity, 219 Allocatable: allocatable, 220 Available: available, 221 }) 222 } 223 } 224 225 return nodes 226 } 227 228 func TopologyZonesToNUMANodeMap(zones []*v1alpha1.TopologyZone) map[int]NUMANode { 229 numaNodeMap := make(map[int]NUMANode) 230 231 for _, topologyZone := range zones { 232 if topologyZone.Type != v1alpha1.TopologyTypeSocket { 233 continue 234 } 235 for _, child := range topologyZone.Children { 236 if child.Type != v1alpha1.TopologyTypeNuma { 237 continue 238 } 239 numaID, err := getID(child.Name) 240 if err != nil { 241 klog.Error(err) 242 continue 243 } 244 capacity, allocatable, available := extractAvailableResources(child) 245 numaNodeMap[numaID] = NUMANode{ 246 SocketID: topologyZone.Name, 247 NUMAID: numaID, 248 Capacity: capacity, 249 Allocatable: allocatable, 250 Available: available, 251 } 252 } 253 } 254 255 return numaNodeMap 256 } 257 258 func getID(name string) (int, error) { 259 numaID, err := strconv.Atoi(name) 260 if err != nil { 261 return -1, fmt.Errorf("invalid zone format zone: %s : %v", name, err) 262 } 263 264 if numaID > maxNUMAId-1 || numaID < 0 { 265 return -1, fmt.Errorf("invalid NUMA id range numaID: %d", numaID) 266 } 267 268 return numaID, nil 269 } 270 271 func extractAvailableResources(zone *v1alpha1.TopologyZone) (capacity, allocatable, available v1.ResourceList) { 272 used := make(v1.ResourceList) 273 for _, alloc := range zone.Allocations { 274 for resName, quantity := range *alloc.Requests { 275 if _, ok := used[resName]; !ok { 276 used[resName] = quantity.DeepCopy() 277 } else { 278 value := used[resName] 279 value.Add(quantity) 280 used[resName] = value 281 } 282 } 283 } 284 return zone.Resources.Capacity.DeepCopy(), zone.Resources.Allocatable.DeepCopy(), quotav1.SubtractWithNonNegativeResult(*zone.Resources.Allocatable, used) 285 } 286 287 func minNumaNodeCount(resourceName v1.ResourceName, quantity resource.Quantity, numaNodeMap map[int]NUMANode) int { 288 var ( 289 i = 0 290 sumResource resource.Quantity 291 ) 292 293 // allocatable in each numa may not equal because of resource reserve 294 for _, numaNode := range numaNodeMap { 295 i++ 296 if i == 1 { 297 sumResource = numaNode.Capacity[resourceName] 298 } else { 299 sumResource.Add(numaNode.Capacity[resourceName]) 300 } 301 if sumResource.Cmp(quantity) >= 0 { 302 return i 303 } 304 } 305 return i 306 }