github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/memory/dynamicpolicy/state/state.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package state 18 19 import ( 20 "encoding/json" 21 "fmt" 22 23 info "github.com/google/cadvisor/info/v1" 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/klog/v2" 26 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 27 28 "github.com/kubewharf/katalyst-api/pkg/consts" 29 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate" 30 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" 31 "github.com/kubewharf/katalyst-core/pkg/util/general" 32 "github.com/kubewharf/katalyst-core/pkg/util/machine" 33 ) 34 35 type AllocationInfo struct { 36 PodUid string `json:"pod_uid,omitempty"` 37 PodNamespace string `json:"pod_namespace,omitempty"` 38 PodName string `json:"pod_name,omitempty"` 39 ContainerName string `json:"container_name,omitempty"` 40 ContainerType string `json:"container_type,omitempty"` 41 ContainerIndex uint64 `json:"container_index,omitempty"` 42 RampUp bool `json:"ramp_up,omitempty"` 43 PodRole string `json:"pod_role,omitempty"` 44 PodType string `json:"pod_type,omitempty"` 45 AggregatedQuantity uint64 `json:"aggregated_quantity"` 46 NumaAllocationResult machine.CPUSet `json:"numa_allocation_result,omitempty"` 47 48 // keyed by numa node id, value is assignment for the pod in corresponding NUMA node 49 TopologyAwareAllocations map[int]uint64 `json:"topology_aware_allocations"` 50 51 // keyed by control knob names referred in memoryadvisor package 52 ExtraControlKnobInfo map[string]commonstate.ControlKnobInfo `json:"extra_control_knob_info"` 53 Labels map[string]string `json:"labels"` 54 Annotations map[string]string `json:"annotations"` 55 QoSLevel string `json:"qosLevel"` 56 } 57 58 type ( 59 ContainerEntries map[string]*AllocationInfo // Keyed by container name 60 PodEntries map[string]ContainerEntries // Keyed by pod UID 61 PodResourceEntries map[v1.ResourceName]PodEntries // Keyed by resource name 62 ) 63 64 // NUMANodeState records the amount of memory per numa node (in bytes) 65 type NUMANodeState struct { 66 TotalMemSize uint64 `json:"total"` 67 SystemReserved uint64 `json:"systemReserved"` 68 Allocatable uint64 `json:"allocatable"` 69 Allocated uint64 `json:"Allocated"` 70 Free uint64 `json:"free"` 71 PodEntries PodEntries `json:"pod_entries"` 72 } 73 74 type ( 75 NUMANodeMap map[int]*NUMANodeState // keyed by numa node id 76 NUMANodeResourcesMap map[v1.ResourceName]NUMANodeMap // keyed by resource name 77 ) 78 79 func (ai *AllocationInfo) String() string { 80 if ai == nil { 81 return "" 82 } 83 84 contentBytes, err := json.Marshal(ai) 85 if err != nil { 86 klog.Errorf("[AllocationInfo.String] marshal AllocationInfo failed with error: %v", err) 87 return "" 88 } 89 return string(contentBytes) 90 } 91 92 func (ai *AllocationInfo) Clone() *AllocationInfo { 93 if ai == nil { 94 return nil 95 } 96 97 clone := &AllocationInfo{ 98 PodUid: ai.PodUid, 99 PodNamespace: ai.PodNamespace, 100 PodName: ai.PodName, 101 ContainerName: ai.ContainerName, 102 ContainerType: ai.ContainerType, 103 ContainerIndex: ai.ContainerIndex, 104 RampUp: ai.RampUp, 105 PodRole: ai.PodRole, 106 PodType: ai.PodType, 107 AggregatedQuantity: ai.AggregatedQuantity, 108 NumaAllocationResult: ai.NumaAllocationResult.Clone(), 109 QoSLevel: ai.QoSLevel, 110 Labels: general.DeepCopyMap(ai.Labels), 111 Annotations: general.DeepCopyMap(ai.Annotations), 112 } 113 114 if ai.TopologyAwareAllocations != nil { 115 clone.TopologyAwareAllocations = make(map[int]uint64) 116 117 for node, quantity := range ai.TopologyAwareAllocations { 118 clone.TopologyAwareAllocations[node] = quantity 119 } 120 } 121 122 if ai.ExtraControlKnobInfo != nil { 123 clone.ExtraControlKnobInfo = make(map[string]commonstate.ControlKnobInfo) 124 125 for name := range ai.ExtraControlKnobInfo { 126 clone.ExtraControlKnobInfo[name] = ai.ExtraControlKnobInfo[name] 127 } 128 } 129 130 return clone 131 } 132 133 // CheckNumaBinding returns true if the AllocationInfo is for pod with 134 // dedicated-qos and numa-binding enhancement 135 func (ai *AllocationInfo) CheckNumaBinding() bool { 136 return ai.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores && 137 ai.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable 138 } 139 140 // CheckMainContainer returns true if the AllocationInfo is for main container 141 func (ai *AllocationInfo) CheckMainContainer() bool { 142 return ai.ContainerType == pluginapi.ContainerType_MAIN.String() 143 } 144 145 // CheckSideCar returns true if the AllocationInfo is for side-car container 146 func (ai *AllocationInfo) CheckSideCar() bool { 147 return ai.ContainerType == pluginapi.ContainerType_SIDECAR.String() 148 } 149 150 // GetResourceAllocation transforms resource allocation information into *pluginapi.ResourceAllocation 151 func (ai *AllocationInfo) GetResourceAllocation() (*pluginapi.ResourceAllocation, error) { 152 if ai == nil { 153 return nil, fmt.Errorf("GetResourceAllocation of nil AllocationInfo") 154 } 155 156 // deal with main resource 157 resourceAllocation := &pluginapi.ResourceAllocation{ 158 ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{ 159 string(v1.ResourceMemory): { 160 OciPropertyName: util.OCIPropertyNameCPUSetMems, 161 IsNodeResource: false, 162 IsScalarResource: true, 163 AllocatedQuantity: float64(ai.AggregatedQuantity), 164 AllocationResult: ai.NumaAllocationResult.String(), 165 }, 166 }, 167 } 168 169 // deal with accompanying resources 170 for name, entry := range ai.ExtraControlKnobInfo { 171 if entry.OciPropertyName == "" { 172 continue 173 } 174 175 if resourceAllocation.ResourceAllocation[name] != nil { 176 return nil, fmt.Errorf("name: %s meets conflict", name) 177 } 178 179 resourceAllocation.ResourceAllocation[name] = &pluginapi.ResourceAllocationInfo{ 180 OciPropertyName: entry.OciPropertyName, 181 AllocationResult: entry.ControlKnobValue, 182 } 183 } 184 185 return resourceAllocation, nil 186 } 187 188 func (pe PodEntries) Clone() PodEntries { 189 if pe == nil { 190 return nil 191 } 192 193 clone := make(PodEntries) 194 for podUID, containerEntries := range pe { 195 if containerEntries == nil { 196 continue 197 } 198 199 clone[podUID] = make(ContainerEntries) 200 for containerName, allocationInfo := range containerEntries { 201 clone[podUID][containerName] = allocationInfo.Clone() 202 } 203 } 204 return clone 205 } 206 207 // GetMainContainerAllocation returns AllocationInfo that belongs 208 // the main container for this pod 209 func (pe PodEntries) GetMainContainerAllocation(podUID string) (*AllocationInfo, bool) { 210 for _, allocationInfo := range pe[podUID] { 211 if allocationInfo.CheckMainContainer() { 212 return allocationInfo, true 213 } 214 } 215 return nil, false 216 } 217 218 func (pre PodResourceEntries) String() string { 219 if pre == nil { 220 return "" 221 } 222 223 contentBytes, err := json.Marshal(pre) 224 if err != nil { 225 klog.Errorf("[PodResourceEntries.String] marshal PodResourceEntries failed with error: %v", err) 226 return "" 227 } 228 return string(contentBytes) 229 } 230 231 func (pre PodResourceEntries) Clone() PodResourceEntries { 232 if pre == nil { 233 return nil 234 } 235 236 clone := make(PodResourceEntries) 237 for resourceName, podEntries := range pre { 238 clone[resourceName] = podEntries.Clone() 239 } 240 return clone 241 } 242 243 func (ns *NUMANodeState) String() string { 244 if ns == nil { 245 return "" 246 } 247 248 contentBytes, err := json.Marshal(ns) 249 if err != nil { 250 klog.Errorf("[NUMANodeState.String] marshal NUMANodeState failed with error: %v", err) 251 return "" 252 } 253 return string(contentBytes) 254 } 255 256 func (ns *NUMANodeState) Clone() *NUMANodeState { 257 if ns == nil { 258 return nil 259 } 260 261 return &NUMANodeState{ 262 TotalMemSize: ns.TotalMemSize, 263 SystemReserved: ns.SystemReserved, 264 Allocatable: ns.Allocatable, 265 Allocated: ns.Allocated, 266 Free: ns.Free, 267 PodEntries: ns.PodEntries.Clone(), 268 } 269 } 270 271 // HasNUMABindingPods returns true if any AllocationInfo in this NUMANodeState is for numa-binding 272 func (ns *NUMANodeState) HasNUMABindingPods() bool { 273 if ns == nil { 274 return false 275 } 276 277 for _, containerEntries := range ns.PodEntries { 278 for _, allocationInfo := range containerEntries { 279 if allocationInfo != nil && allocationInfo.CheckNumaBinding() { 280 return true 281 } 282 } 283 } 284 return false 285 } 286 287 // SetAllocationInfo adds a new AllocationInfo (for pod/container pairs) into the given NUMANodeState 288 func (ns *NUMANodeState) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) { 289 if ns == nil { 290 return 291 } 292 293 if ns.PodEntries == nil { 294 ns.PodEntries = make(PodEntries) 295 } 296 297 if _, ok := ns.PodEntries[podUID]; !ok { 298 ns.PodEntries[podUID] = make(ContainerEntries) 299 } 300 301 ns.PodEntries[podUID][containerName] = allocationInfo.Clone() 302 } 303 304 func (nm NUMANodeMap) Clone() NUMANodeMap { 305 clone := make(NUMANodeMap) 306 for node, ns := range nm { 307 clone[node] = ns.Clone() 308 } 309 return clone 310 } 311 312 // BytesPerNUMA is a helper function to parse memory capacity at per numa level 313 func (nm NUMANodeMap) BytesPerNUMA() (uint64, error) { 314 if len(nm) == 0 { 315 return 0, fmt.Errorf("getBytesPerNUMAFromMachineState got nil numaMap") 316 } 317 318 var maxNUMAAllocatable uint64 319 for _, numaState := range nm { 320 if numaState != nil { 321 maxNUMAAllocatable = general.MaxUInt64(maxNUMAAllocatable, numaState.Allocatable) 322 } 323 } 324 325 if maxNUMAAllocatable > 0 { 326 return maxNUMAAllocatable, nil 327 } 328 329 return 0, fmt.Errorf("getBytesPerNUMAFromMachineState doesn't get valid numaState") 330 } 331 332 // GetNUMANodesWithoutNUMABindingPods returns a set of numa nodes; for 333 // those numa nodes, they all don't contain numa-binding pods 334 func (nm NUMANodeMap) GetNUMANodesWithoutNUMABindingPods() machine.CPUSet { 335 res := machine.NewCPUSet() 336 for numaId, numaNodeState := range nm { 337 if numaNodeState != nil && !numaNodeState.HasNUMABindingPods() { 338 res = res.Union(machine.NewCPUSet(numaId)) 339 } 340 } 341 return res 342 } 343 344 func (nrm NUMANodeResourcesMap) String() string { 345 if nrm == nil { 346 return "" 347 } 348 349 contentBytes, err := json.Marshal(nrm) 350 if err != nil { 351 klog.Errorf("[NUMANodeResourcesMap.String] marshal NUMANodeResourcesMap failed with error: %v", err) 352 return "" 353 } 354 return string(contentBytes) 355 } 356 357 func (nrm NUMANodeResourcesMap) Clone() NUMANodeResourcesMap { 358 clone := make(NUMANodeResourcesMap) 359 for resourceName, nm := range nrm { 360 clone[resourceName] = nm.Clone() 361 } 362 return clone 363 } 364 365 // reader is used to get information from local states 366 type reader interface { 367 GetMachineState() NUMANodeResourcesMap 368 GetPodResourceEntries() PodResourceEntries 369 GetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string) *AllocationInfo 370 } 371 372 // writer is used to store information into local states, 373 // and it also provides functionality to maintain the local files 374 type writer interface { 375 SetMachineState(numaNodeResourcesMap NUMANodeResourcesMap) 376 SetPodResourceEntries(podResourceEntries PodResourceEntries) 377 SetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string, allocationInfo *AllocationInfo) 378 379 Delete(resourceName v1.ResourceName, podUID, containerName string) 380 ClearState() 381 } 382 383 // ReadonlyState interface only provides methods for tracking pod assignments 384 type ReadonlyState interface { 385 reader 386 387 GetMachineInfo() *info.MachineInfo 388 GetReservedMemory() map[v1.ResourceName]map[int]uint64 389 } 390 391 // State interface provides methods for tracking and setting pod assignments 392 type State interface { 393 writer 394 ReadonlyState 395 }