github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package dynamicpolicy 18 19 import ( 20 "context" 21 "fmt" 22 "sort" 23 24 v1 "k8s.io/api/core/v1" 25 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 26 "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" 27 28 apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" 29 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" 30 cpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/util" 31 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" 32 "github.com/kubewharf/katalyst-core/pkg/util/general" 33 "github.com/kubewharf/katalyst-core/pkg/util/machine" 34 qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos" 35 ) 36 37 func (p *DynamicPolicy) sharedCoresHintHandler(_ context.Context, 38 req *pluginapi.ResourceRequest, 39 ) (*pluginapi.ResourceHintsResponse, error) { 40 if req == nil { 41 return nil, fmt.Errorf("got nil request") 42 } 43 44 return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), 45 map[string]*pluginapi.ListOfTopologyHints{ 46 string(v1.ResourceCPU): nil, // indicates that there is no numa preference 47 }) 48 } 49 50 func (p *DynamicPolicy) reclaimedCoresHintHandler(ctx context.Context, 51 req *pluginapi.ResourceRequest, 52 ) (*pluginapi.ResourceHintsResponse, error) { 53 return p.sharedCoresHintHandler(ctx, req) 54 } 55 56 func (p *DynamicPolicy) dedicatedCoresHintHandler(ctx context.Context, 57 req *pluginapi.ResourceRequest, 58 ) (*pluginapi.ResourceHintsResponse, error) { 59 if req == nil { 60 return nil, fmt.Errorf("dedicatedCoresHintHandler got nil req") 61 } 62 63 switch req.Annotations[apiconsts.PodAnnotationMemoryEnhancementNumaBinding] { 64 case apiconsts.PodAnnotationMemoryEnhancementNumaBindingEnable: 65 return p.dedicatedCoresWithNUMABindingHintHandler(ctx, req) 66 default: 67 return p.dedicatedCoresWithoutNUMABindingHintHandler(ctx, req) 68 } 69 } 70 71 func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Context, 72 req *pluginapi.ResourceRequest, 73 ) (*pluginapi.ResourceHintsResponse, error) { 74 // currently, we set cpuset of sidecar to the cpuset of its main container, 75 // so there is no numa preference here. 76 if req.ContainerType == pluginapi.ContainerType_SIDECAR { 77 return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), 78 map[string]*pluginapi.ListOfTopologyHints{ 79 string(v1.ResourceCPU): nil, // indicates that there is no numa preference 80 }) 81 } 82 83 reqInt, _, err := util.GetQuantityFromResourceReq(req) 84 if err != nil { 85 return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err) 86 } 87 88 machineState := p.state.GetMachineState() 89 var hints map[string]*pluginapi.ListOfTopologyHints 90 91 allocationInfo := p.state.GetAllocationInfo(req.PodUid, req.ContainerName) 92 if allocationInfo != nil { 93 hints = cpuutil.RegenerateHints(allocationInfo, reqInt) 94 95 // regenerateHints failed. need to clear container record and re-calculate. 96 if hints == nil { 97 podEntries := p.state.GetPodEntries() 98 delete(podEntries[req.PodUid], req.ContainerName) 99 if len(podEntries[req.PodUid]) == 0 { 100 delete(podEntries, req.PodUid) 101 } 102 103 var err error 104 machineState, err = generateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries) 105 if err != nil { 106 general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v", 107 req.PodNamespace, req.PodName, req.ContainerName, err) 108 return nil, fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) 109 } 110 } 111 } 112 113 // if hints exists in extra state-file, prefer to use them 114 if hints == nil { 115 availableNUMAs := machineState.GetFilteredNUMASet(state.CheckNUMABinding) 116 117 var extraErr error 118 hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceCPU), p.extraStateFileAbsPath, availableNUMAs) 119 if extraErr != nil { 120 general.Infof("pod: %s/%s, container: %s GetHintsFromExtraStateFile failed with error: %v", 121 req.PodNamespace, req.PodName, req.ContainerName, extraErr) 122 } 123 } 124 125 // otherwise, calculate hint for container without allocated memory 126 if hints == nil { 127 var calculateErr error 128 // calculate hint for container without allocated cpus 129 hints, calculateErr = p.calculateHints(reqInt, machineState, req.Annotations) 130 if calculateErr != nil { 131 return nil, fmt.Errorf("calculateHints failed with error: %v", calculateErr) 132 } 133 } 134 135 return util.PackResourceHintsResponse(req, string(v1.ResourceCPU), hints) 136 } 137 138 func (p *DynamicPolicy) dedicatedCoresWithoutNUMABindingHintHandler(_ context.Context, 139 _ *pluginapi.ResourceRequest, 140 ) (*pluginapi.ResourceHintsResponse, error) { 141 // todo: support dedicated_cores without NUMA binding 142 return nil, fmt.Errorf("not support dedicated_cores without NUMA binding") 143 } 144 145 // calculateHints is a helper function to calculate the topology hints 146 // with the given container requests. 147 func (p *DynamicPolicy) calculateHints(reqInt int, machineState state.NUMANodeMap, 148 reqAnnotations map[string]string, 149 ) (map[string]*pluginapi.ListOfTopologyHints, error) { 150 numaNodes := make([]int, 0, len(machineState)) 151 for numaNode := range machineState { 152 numaNodes = append(numaNodes, numaNode) 153 } 154 sort.Ints(numaNodes) 155 156 hints := map[string]*pluginapi.ListOfTopologyHints{ 157 string(v1.ResourceCPU): { 158 Hints: []*pluginapi.TopologyHint{}, 159 }, 160 } 161 162 minNUMAsCountNeeded, _, err := util.GetNUMANodesCountToFitCPUReq(reqInt, p.machineInfo.CPUTopology) 163 if err != nil { 164 return nil, fmt.Errorf("GetNUMANodesCountToFitCPUReq failed with error: %v", err) 165 } 166 167 // because it's hard to control memory allocation accurately, 168 // we only support numa_binding but not exclusive container with request smaller than 1 NUMA 169 if qosutil.AnnotationsIndicateNUMABinding(reqAnnotations) && 170 !qosutil.AnnotationsIndicateNUMAExclusive(reqAnnotations) && 171 minNUMAsCountNeeded > 1 { 172 return nil, fmt.Errorf("NUMA not exclusive binding container has request larger than 1 NUMA") 173 } 174 175 numaPerSocket, err := p.machineInfo.NUMAsPerSocket() 176 if err != nil { 177 return nil, fmt.Errorf("NUMAsPerSocket failed with error: %v", err) 178 } 179 180 bitmask.IterateBitMasks(numaNodes, func(mask bitmask.BitMask) { 181 maskCount := mask.Count() 182 if maskCount < minNUMAsCountNeeded { 183 return 184 } else if qosutil.AnnotationsIndicateNUMABinding(reqAnnotations) && 185 !qosutil.AnnotationsIndicateNUMAExclusive(reqAnnotations) && 186 maskCount > 1 { 187 // because it's hard to control memory allocation accurately, 188 // we only support numa_binding but not exclusive container with request smaller than 1 NUMA 189 return 190 } 191 192 maskBits := mask.GetBits() 193 numaCountNeeded := mask.Count() 194 195 allAvailableCPUsInMask := machine.NewCPUSet() 196 for _, nodeID := range maskBits { 197 if machineState[nodeID] == nil { 198 general.Warningf("NUMA: %d has nil state", nodeID) 199 return 200 } else if qosutil.AnnotationsIndicateNUMAExclusive(reqAnnotations) && machineState[nodeID].AllocatedCPUSet.Size() > 0 { 201 general.Warningf("numa_exclusive container skip mask: %s with NUMA: %d allocated: %d", 202 mask.String(), nodeID, machineState[nodeID].AllocatedCPUSet.Size()) 203 return 204 } 205 206 allAvailableCPUsInMask = allAvailableCPUsInMask.Union(machineState[nodeID].GetAvailableCPUSet(p.reservedCPUs)) 207 } 208 209 if allAvailableCPUsInMask.Size() < reqInt { 210 general.InfofV(4, "available cpuset: %s of size: %d excluding NUMA binding pods which is smaller than request: %d", 211 allAvailableCPUsInMask.String(), allAvailableCPUsInMask.Size(), reqInt) 212 return 213 } 214 215 crossSockets, err := machine.CheckNUMACrossSockets(maskBits, p.machineInfo.CPUTopology) 216 if err != nil { 217 general.Errorf("CheckNUMACrossSockets failed with error: %v", err) 218 return 219 } else if numaCountNeeded <= numaPerSocket && crossSockets { 220 general.InfofV(4, "needed: %d; min-needed: %d; NUMAs: %v cross sockets with numaPerSocket: %d", 221 numaCountNeeded, minNUMAsCountNeeded, maskBits, numaPerSocket) 222 return 223 } 224 225 hints[string(v1.ResourceCPU)].Hints = append(hints[string(v1.ResourceCPU)].Hints, &pluginapi.TopologyHint{ 226 Nodes: machine.MaskToUInt64Array(mask), 227 Preferred: len(maskBits) == minNUMAsCountNeeded, 228 }) 229 }) 230 231 return hints, nil 232 }