github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/network/staticpolicy/util.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package staticpolicy 18 19 import ( 20 "fmt" 21 "math/rand" 22 "time" 23 24 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 25 26 "github.com/kubewharf/katalyst-api/pkg/consts" 27 "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent" 28 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network/state" 29 "github.com/kubewharf/katalyst-core/pkg/util/general" 30 "github.com/kubewharf/katalyst-core/pkg/util/machine" 31 ) 32 33 type ( 34 ReservationPolicy string 35 NICSelectionPoligy string 36 ) 37 38 const ( 39 FirstNIC ReservationPolicy = "first" 40 EvenDistribution ReservationPolicy = "even" 41 42 RandomOne NICSelectionPoligy = "random" 43 FirstOne NICSelectionPoligy = "first" 44 LastOne NICSelectionPoligy = "last" 45 ) 46 47 type NICFilter func(nics []machine.InterfaceInfo, req *pluginapi.ResourceRequest, agentCtx *agent.GenericContext) []machine.InterfaceInfo 48 49 // isReqAffinityRestricted returns true if allocated network interface must have affinity with allocated numa 50 func isReqAffinityRestricted(reqAnnotations map[string]string) bool { 51 return reqAnnotations[consts.PodAnnotationNetworkEnhancementAffinityRestricted] == 52 consts.PodAnnotationNetworkEnhancementAffinityRestrictedTrue 53 } 54 55 // isReqNamespaceRestricted returns true if allocated network interface must be bind to a certain namespace type 56 func isReqNamespaceRestricted(reqAnnotations map[string]string) bool { 57 return reqAnnotations[consts.PodAnnotationNetworkEnhancementNamespaceType] == 58 consts.PodAnnotationNetworkEnhancementNamespaceTypeHost || 59 reqAnnotations[consts.PodAnnotationNetworkEnhancementNamespaceType] == 60 consts.PodAnnotationNetworkEnhancementNamespaceTypeNotHost 61 } 62 63 // checkNICPreferenceOfReq returns true if allocate network interface matches up with the 64 // preference of requests, and it will return error if it breaks hard restrictions. 65 func checkNICPreferenceOfReq(nic machine.InterfaceInfo, reqAnnotations map[string]string) (bool, error) { 66 switch reqAnnotations[consts.PodAnnotationNetworkEnhancementNamespaceType] { 67 case consts.PodAnnotationNetworkEnhancementNamespaceTypeHost: 68 if nic.NSName == machine.DefaultNICNamespace { 69 return true, nil 70 } else { 71 return false, fmt.Errorf("checkNICPreferenceOfReq got invalid nic: %s with %s: %s, NSName: %s", 72 nic.Iface, consts.PodAnnotationNetworkEnhancementNamespaceType, 73 consts.PodAnnotationNetworkEnhancementNamespaceTypeHost, nic.NSName) 74 } 75 case consts.PodAnnotationNetworkEnhancementNamespaceTypeHostPrefer: 76 if nic.NSName == machine.DefaultNICNamespace { 77 return true, nil 78 } else { 79 return false, nil 80 } 81 case consts.PodAnnotationNetworkEnhancementNamespaceTypeNotHost: 82 if nic.NSName != machine.DefaultNICNamespace { 83 return true, nil 84 } else { 85 return false, fmt.Errorf("checkNICPreferenceOfReq got invalid nic: %s with %s: %s, NSName: %s", 86 nic.Iface, consts.PodAnnotationNetworkEnhancementNamespaceType, 87 consts.PodAnnotationNetworkEnhancementNamespaceTypeHost, nic.NSName) 88 } 89 case consts.PodAnnotationNetworkEnhancementNamespaceTypeNotHostPrefer: 90 if nic.NSName != machine.DefaultNICNamespace { 91 return true, nil 92 } else { 93 return false, nil 94 } 95 default: 96 // there is no preference, 97 // so any type will be preferred. 98 return true, nil 99 } 100 } 101 102 // filterAvailableNICsByReq walks through nicFilters to select the targeted network interfaces 103 func filterAvailableNICsByReq(nics []machine.InterfaceInfo, req *pluginapi.ResourceRequest, agentCtx *agent.GenericContext, nicFilters []NICFilter) ([]machine.InterfaceInfo, error) { 104 if req == nil { 105 return nil, fmt.Errorf("filterAvailableNICsByReq got nil req") 106 } else if agentCtx == nil { 107 return nil, fmt.Errorf("filterAvailableNICsByReq got nil agentCtx") 108 } 109 110 filteredNICs := nics 111 for _, nicFilter := range nicFilters { 112 filteredNICs = nicFilter(filteredNICs, req, agentCtx) 113 } 114 return filteredNICs, nil 115 } 116 117 func filterNICsByAvailability(nics []machine.InterfaceInfo, _ *pluginapi.ResourceRequest, _ *agent.GenericContext) []machine.InterfaceInfo { 118 filteredNICs := make([]machine.InterfaceInfo, 0, len(nics)) 119 for _, nic := range nics { 120 if !nic.Enable { 121 general.Warningf("nic: %s isn't enabled", nic.Iface) 122 continue 123 } else if nic.Addr == nil || (len(nic.Addr.IPV4) == 0 && len(nic.Addr.IPV6) == 0) { 124 general.Warningf("nic: %s doesn't have IP address", nic.Iface) 125 continue 126 } 127 128 filteredNICs = append(filteredNICs, nic) 129 } 130 131 if len(filteredNICs) == 0 { 132 general.InfoS("nic list returned by filterNICsByAvailability is empty") 133 } 134 135 return filteredNICs 136 } 137 138 func filterNICsByNamespaceType(nics []machine.InterfaceInfo, req *pluginapi.ResourceRequest, _ *agent.GenericContext) []machine.InterfaceInfo { 139 filteredNICs := make([]machine.InterfaceInfo, 0, len(nics)) 140 141 for _, nic := range nics { 142 filterOut := true 143 switch req.Annotations[consts.PodAnnotationNetworkEnhancementNamespaceType] { 144 case consts.PodAnnotationNetworkEnhancementNamespaceTypeHost: 145 if nic.NSName == machine.DefaultNICNamespace { 146 filteredNICs = append(filteredNICs, nic) 147 filterOut = false 148 } 149 case consts.PodAnnotationNetworkEnhancementNamespaceTypeNotHost: 150 if nic.NSName != machine.DefaultNICNamespace { 151 filteredNICs = append(filteredNICs, nic) 152 filterOut = false 153 } 154 default: 155 filteredNICs = append(filteredNICs, nic) 156 filterOut = false 157 } 158 159 if filterOut { 160 general.Infof("filter out nic: %s mismatching with enhancement %s: %s", 161 nic.Iface, consts.PodAnnotationNetworkEnhancementNamespaceType, consts.PodAnnotationNetworkEnhancementNamespaceTypeHost) 162 } 163 } 164 165 if len(filteredNICs) == 0 { 166 general.InfoS("nic list returned by filterNICsByNamespaceType is empty", 167 "podNamespace", req.PodNamespace, 168 "podName", req.PodName, 169 "containerName", req.ContainerName) 170 } 171 172 return filteredNICs 173 } 174 175 func filterNICsByHint(nics []machine.InterfaceInfo, req *pluginapi.ResourceRequest, agentCtx *agent.GenericContext) []machine.InterfaceInfo { 176 // means not to filter by hint (in topology hint calculation period) 177 if req.Hint == nil { 178 general.InfoS("req hint is nil, skip filterNICsByHint", 179 "podNamespace", req.PodNamespace, 180 "podName", req.PodName, 181 "containerName", req.ContainerName) 182 return nics 183 } 184 185 var exactlyMatchNIC *machine.InterfaceInfo 186 hintMatchedNICs := make([]machine.InterfaceInfo, 0, len(nics)) 187 188 hintNUMASet, err := machine.NewCPUSetUint64(req.Hint.Nodes...) 189 if err != nil { 190 general.Errorf("NewCPUSetUint64 failed with error: %v, filter out all nics", err) 191 return nil 192 } 193 194 for i, nic := range nics { 195 siblingNUMAs, err := machine.GetSiblingNUMAs(nic.NumaNode, agentCtx.CPUTopology) 196 if err != nil { 197 general.Errorf("get siblingNUMAs for nic: %s failed with error: %v, filter out it", nic.Iface, err) 198 continue 199 } 200 201 if siblingNUMAs.Equals(hintNUMASet) { 202 if exactlyMatchNIC == nil { 203 general.InfoS("add hint exactly matched nic", 204 "podNamespace", req.PodNamespace, 205 "podName", req.PodName, 206 "containerName", req.ContainerName, 207 "nic", nic.Iface, 208 "siblingNUMAs", siblingNUMAs.String(), 209 "hintNUMASet", hintNUMASet.String()) 210 exactlyMatchNIC = &nics[i] 211 } 212 } else if siblingNUMAs.IsSubsetOf(hintNUMASet) { // for pod affinity_restricted != true 213 general.InfoS("add hint matched nic", 214 "podNamespace", req.PodNamespace, 215 "podName", req.PodName, 216 "containerName", req.ContainerName, 217 "nic", nic.Iface, 218 "siblingNUMAs", siblingNUMAs.String(), 219 "hintNUMASet", hintNUMASet.String()) 220 hintMatchedNICs = append(hintMatchedNICs, nic) 221 } 222 } 223 224 if exactlyMatchNIC != nil { 225 return []machine.InterfaceInfo{*exactlyMatchNIC} 226 } else { 227 return hintMatchedNICs 228 } 229 } 230 231 func getRandomNICs(nics []machine.InterfaceInfo) machine.InterfaceInfo { 232 r := rand.New(rand.NewSource(time.Now().UnixNano())) 233 return nics[r.Intn(len(nics))] 234 } 235 236 func selectOneNIC(nics []machine.InterfaceInfo, policy NICSelectionPoligy) machine.InterfaceInfo { 237 if len(nics) == 0 { 238 general.Errorf("no NIC to select") 239 return machine.InterfaceInfo{} 240 } 241 242 switch policy { 243 case RandomOne: 244 return getRandomNICs(nics) 245 case FirstOne: 246 // since we only pass filtered nics, always picking the first or the last one actually indicates a kind of binpacking 247 return nics[0] 248 case LastOne: 249 return nics[len(nics)-1] 250 } 251 252 // use LastOne as default 253 return nics[len(nics)-1] 254 } 255 256 // packAllocationResponse fills pluginapi.ResourceAllocationResponse with information from AllocationInfo and pluginapi.ResourceRequest 257 func packAllocationResponse(req *pluginapi.ResourceRequest, allocationInfo *state.AllocationInfo, respHint *pluginapi.TopologyHint, resourceAllocationAnnotations map[string]string) (*pluginapi.ResourceAllocationResponse, error) { 258 if allocationInfo == nil { 259 return nil, fmt.Errorf("packAllocationResponse got nil allocationInfo") 260 } else if req == nil { 261 return nil, fmt.Errorf("packAllocationResponse got nil request") 262 } 263 264 return &pluginapi.ResourceAllocationResponse{ 265 PodUid: req.PodUid, 266 PodNamespace: req.PodNamespace, 267 PodName: req.PodName, 268 ContainerName: req.ContainerName, 269 ContainerType: req.ContainerType, 270 ContainerIndex: req.ContainerIndex, 271 PodRole: req.PodRole, 272 PodType: req.PodType, 273 ResourceName: req.ResourceName, 274 AllocationResult: &pluginapi.ResourceAllocation{ 275 ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ 276 string(consts.ResourceNetBandwidth): { 277 IsNodeResource: true, 278 IsScalarResource: true, // to avoid re-allocating 279 AllocatedQuantity: float64(allocationInfo.Egress), 280 AllocationResult: allocationInfo.NumaNodes.String(), 281 Annotations: resourceAllocationAnnotations, 282 ResourceHints: &pluginapi.ListOfTopologyHints{ 283 Hints: []*pluginapi.TopologyHint{ 284 respHint, 285 }, 286 }, 287 }, 288 }, 289 }, 290 Labels: general.DeepCopyMap(req.Labels), 291 Annotations: general.DeepCopyMap(req.Annotations), 292 }, nil 293 } 294 295 // getReservedBandwidth is used to spread total reserved bandwidth into per-nic level. 296 func getReservedBandwidth(nics []machine.InterfaceInfo, reservation uint32, policy ReservationPolicy) (map[string]uint32, error) { 297 nicCount := len(nics) 298 reservedBandwidth := make(map[string]uint32) 299 300 if nicCount == 0 { 301 return reservedBandwidth, nil 302 } 303 304 general.Infof("reservedBanwidth: %d, nicCount: %d, policy: %s, ", 305 reservation, nicCount, policy) 306 307 switch policy { 308 case FirstNIC: 309 reservedBandwidth[nics[0].Iface] = reservation 310 case EvenDistribution: 311 for _, iface := range nics { 312 reservedBandwidth[iface.Iface] = reservation / uint32(nicCount) 313 } 314 default: 315 return nil, fmt.Errorf("unsupported network bandwidth reservation policy: %s", policy) 316 } 317 318 return reservedBandwidth, nil 319 } 320 321 func getResourceIdentifier(ifaceNS, ifaceName string) string { 322 if len(ifaceNS) > 0 { 323 return fmt.Sprintf("%s-%s", ifaceNS, ifaceName) 324 } 325 326 return ifaceName 327 }