github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package state 18 19 import ( 20 "encoding/json" 21 "fmt" 22 23 "k8s.io/apimachinery/pkg/util/sets" 24 "k8s.io/klog/v2" 25 pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" 26 27 "github.com/kubewharf/katalyst-api/pkg/consts" 28 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor" 29 "github.com/kubewharf/katalyst-core/pkg/util/general" 30 "github.com/kubewharf/katalyst-core/pkg/util/machine" 31 ) 32 33 // to compatible with checkpoint checksum calculation, 34 // we should make guarantees below in checkpoint properties assignment 35 // 1. resource.Quantity use resource.MustParse("0") to initialize, not to use resource.Quantity{} 36 // 2. CPUSet use NewCPUSet(...) to initialize, not to use CPUSet{} 37 // 3. not use omitempty in map property and must make new map to do initialization 38 39 type AllocationInfo struct { 40 PodUid string `json:"pod_uid,omitempty"` 41 PodNamespace string `json:"pod_namespace,omitempty"` 42 PodName string `json:"pod_name,omitempty"` 43 ContainerName string `json:"container_name,omitempty"` 44 ContainerType string `json:"container_type,omitempty"` 45 ContainerIndex uint64 `json:"container_index,omitempty"` 46 RampUp bool `json:"ramp_up,omitempty"` 47 OwnerPoolName string `json:"owner_pool_name,omitempty"` 48 PodRole string `json:"pod_role,omitempty"` 49 PodType string `json:"pod_type,omitempty"` 50 AllocationResult machine.CPUSet `json:"allocation_result,omitempty"` 51 OriginalAllocationResult machine.CPUSet `json:"original_allocation_result,omitempty"` 52 53 // key by numa node id, value is assignment for the pod in corresponding NUMA node 54 TopologyAwareAssignments map[int]machine.CPUSet `json:"topology_aware_assignments"` 55 // key by numa node id, value is assignment for the pod in corresponding NUMA node 56 OriginalTopologyAwareAssignments map[int]machine.CPUSet `json:"original_topology_aware_assignments"` 57 // for ramp up calculation. notice we don't use time.Time type here to avid checksum corruption. 58 InitTimestamp string `json:"init_timestamp"` 59 60 Labels map[string]string `json:"labels"` 61 Annotations map[string]string `json:"annotations"` 62 QoSLevel string `json:"qosLevel"` 63 RequestQuantity float64 `json:"request_quantity,omitempty"` 64 } 65 66 type ( 67 ContainerEntries map[string]*AllocationInfo // Keyed by containerName. 68 PodEntries map[string]ContainerEntries // Keyed by podUID. 69 ) 70 71 type NUMANodeState struct { 72 // equals to allocatable cpuset subtracting original allocation result of dedicated_cores with NUMA binding 73 DefaultCPUSet machine.CPUSet `json:"default_cpuset,omitempty"` 74 // equals to original allocation result of dedicated_cores with NUMA binding 75 AllocatedCPUSet machine.CPUSet `json:"allocated_cpuset,omitempty"` 76 77 PodEntries PodEntries `json:"pod_entries"` 78 } 79 80 type NUMANodeMap map[int]*NUMANodeState // keyed by numa node id 81 82 func (ai *AllocationInfo) Clone() *AllocationInfo { 83 if ai == nil { 84 return nil 85 } 86 87 clone := &AllocationInfo{ 88 PodUid: ai.PodUid, 89 PodNamespace: ai.PodNamespace, 90 PodName: ai.PodName, 91 ContainerName: ai.ContainerName, 92 ContainerType: ai.ContainerType, 93 ContainerIndex: ai.ContainerIndex, 94 RampUp: ai.RampUp, 95 OwnerPoolName: ai.OwnerPoolName, 96 PodRole: ai.PodRole, 97 PodType: ai.PodType, 98 AllocationResult: ai.AllocationResult.Clone(), 99 OriginalAllocationResult: ai.OriginalAllocationResult.Clone(), 100 InitTimestamp: ai.InitTimestamp, 101 QoSLevel: ai.QoSLevel, 102 Labels: general.DeepCopyMap(ai.Labels), 103 Annotations: general.DeepCopyMap(ai.Annotations), 104 RequestQuantity: ai.RequestQuantity, 105 } 106 107 if ai.TopologyAwareAssignments != nil { 108 clone.TopologyAwareAssignments = make(map[int]machine.CPUSet) 109 110 for node, cpus := range ai.TopologyAwareAssignments { 111 clone.TopologyAwareAssignments[node] = cpus.Clone() 112 } 113 } 114 115 if ai.OriginalTopologyAwareAssignments != nil { 116 clone.OriginalTopologyAwareAssignments = make(map[int]machine.CPUSet) 117 118 for node, cpus := range ai.OriginalTopologyAwareAssignments { 119 clone.OriginalTopologyAwareAssignments[node] = cpus.Clone() 120 } 121 } 122 123 return clone 124 } 125 126 func (ai *AllocationInfo) String() string { 127 if ai == nil { 128 return "" 129 } 130 131 contentBytes, err := json.Marshal(ai) 132 if err != nil { 133 klog.Errorf("[AllocationInfo.String] marshal AllocationInfo failed with error: %v", err) 134 return "" 135 } 136 return string(contentBytes) 137 } 138 139 // GetPoolName parses the owner pool name for AllocationInfo 140 // if owner exists, just return; otherwise, parse from qos-level 141 func (ai *AllocationInfo) GetPoolName() string { 142 if ai == nil { 143 return cpuadvisor.EmptyOwnerPoolName 144 } 145 146 if ownerPoolName := ai.GetOwnerPoolName(); ownerPoolName != cpuadvisor.EmptyOwnerPoolName { 147 return ownerPoolName 148 } 149 return ai.GetSpecifiedPoolName() 150 } 151 152 // GetOwnerPoolName parses the owner pool name for AllocationInfo 153 func (ai *AllocationInfo) GetOwnerPoolName() string { 154 if ai == nil { 155 return cpuadvisor.EmptyOwnerPoolName 156 } 157 return ai.OwnerPoolName 158 } 159 160 // GetSpecifiedPoolName parses the owner pool name for AllocationInfo from qos-level 161 func (ai *AllocationInfo) GetSpecifiedPoolName() string { 162 if ai == nil { 163 return cpuadvisor.EmptyOwnerPoolName 164 } 165 166 return GetSpecifiedPoolName(ai.QoSLevel, ai.Annotations[consts.PodAnnotationCPUEnhancementCPUSet]) 167 } 168 169 // CheckMainContainer returns true if the AllocationInfo is for main container 170 func (ai *AllocationInfo) CheckMainContainer() bool { 171 return ai.ContainerType == pluginapi.ContainerType_MAIN.String() 172 } 173 174 // CheckSideCar returns true if the AllocationInfo is for side-car container 175 func (ai *AllocationInfo) CheckSideCar() bool { 176 return ai.ContainerType == pluginapi.ContainerType_SIDECAR.String() 177 } 178 179 // CheckDedicated returns true if the AllocationInfo is for pod with dedicated-qos 180 func CheckDedicated(ai *AllocationInfo) bool { 181 return ai.QoSLevel == consts.PodAnnotationQoSLevelDedicatedCores 182 } 183 184 // CheckShared returns true if the AllocationInfo is for pod with shared-qos 185 func CheckShared(ai *AllocationInfo) bool { 186 return ai.QoSLevel == consts.PodAnnotationQoSLevelSharedCores 187 } 188 189 // CheckReclaimed returns true if the AllocationInfo is for pod with reclaimed-qos 190 func CheckReclaimed(ai *AllocationInfo) bool { 191 return ai.QoSLevel == consts.PodAnnotationQoSLevelReclaimedCores 192 } 193 194 // CheckNUMABinding returns true if the AllocationInfo is for pod with numa-binding enhancement 195 func CheckNUMABinding(ai *AllocationInfo) bool { 196 return ai.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable 197 } 198 199 // CheckDedicatedNUMABinding returns true if the AllocationInfo is for pod with 200 // dedicated-qos and numa-binding enhancement 201 func CheckDedicatedNUMABinding(ai *AllocationInfo) bool { 202 return CheckDedicated(ai) && CheckNUMABinding(ai) 203 } 204 205 // CheckDedicatedPool returns true if the AllocationInfo is for a container in the dedicated pool 206 func CheckDedicatedPool(ai *AllocationInfo) bool { 207 return ai.OwnerPoolName == PoolNameDedicated 208 } 209 210 // IsPoolEntry returns true if this entry is for a pool; 211 // otherwise, this entry is for a container entity. 212 func (ce ContainerEntries) IsPoolEntry() bool { 213 return len(ce) == 1 && ce[cpuadvisor.FakedContainerName] != nil 214 } 215 216 func (ce ContainerEntries) GetPoolEntry() *AllocationInfo { 217 if !ce.IsPoolEntry() { 218 return nil 219 } 220 return ce[cpuadvisor.FakedContainerName] 221 } 222 223 // GetMainContainerEntry returns the main container entry in pod container entries 224 func (ce ContainerEntries) GetMainContainerEntry() *AllocationInfo { 225 var mainContainerEntry *AllocationInfo 226 227 for _, siblingEntry := range ce { 228 if siblingEntry != nil && siblingEntry.CheckMainContainer() { 229 mainContainerEntry = siblingEntry 230 break 231 } 232 } 233 234 return mainContainerEntry 235 } 236 237 // GetMainContainerPoolName returns the main container owner pool name in pod container entries 238 func (ce ContainerEntries) GetMainContainerPoolName() string { 239 return ce.GetMainContainerEntry().GetOwnerPoolName() 240 } 241 242 func (pe PodEntries) Clone() PodEntries { 243 if pe == nil { 244 return nil 245 } 246 247 clone := make(PodEntries) 248 for podUID, containerEntries := range pe { 249 if containerEntries == nil { 250 continue 251 } 252 253 clone[podUID] = make(ContainerEntries) 254 for containerName, allocationInfo := range containerEntries { 255 clone[podUID][containerName] = allocationInfo.Clone() 256 } 257 } 258 return clone 259 } 260 261 func (pe PodEntries) String() string { 262 if pe == nil { 263 return "" 264 } 265 266 contentBytes, err := json.Marshal(pe) 267 if err != nil { 268 klog.Errorf("[PodEntries.String] marshal PodEntries failed with error: %v", err) 269 return "" 270 } 271 return string(contentBytes) 272 } 273 274 // CheckPoolEmpty returns true if the given pool doesn't exist 275 func (pe PodEntries) CheckPoolEmpty(poolName string) bool { 276 return pe[poolName][cpuadvisor.FakedContainerName] == nil || 277 pe[poolName][cpuadvisor.FakedContainerName].AllocationResult.IsEmpty() 278 } 279 280 // GetCPUSetForPool returns cpuset that belongs to the given pool 281 func (pe PodEntries) GetCPUSetForPool(poolName string) (machine.CPUSet, error) { 282 if pe == nil { 283 return machine.NewCPUSet(), fmt.Errorf("GetCPUSetForPool from nil podEntries") 284 } 285 286 if !pe[poolName].IsPoolEntry() { 287 return machine.NewCPUSet(), fmt.Errorf("pool not found") 288 } 289 return pe[poolName][cpuadvisor.FakedContainerName].AllocationResult.Clone(), nil 290 } 291 292 // GetFilteredPoolsCPUSet returns a mapping of pools for all of them (except for those skipped ones) 293 func (pe PodEntries) GetFilteredPoolsCPUSet(ignorePools sets.String) machine.CPUSet { 294 ret := machine.NewCPUSet() 295 if pe == nil { 296 return ret 297 } 298 299 for poolName, entries := range pe { 300 allocationInfo := entries.GetPoolEntry() 301 if allocationInfo != nil && !ignorePools.Has(poolName) { 302 ret = ret.Union(allocationInfo.AllocationResult.Clone()) 303 } 304 } 305 return ret 306 } 307 308 // GetFilteredPoolsCPUSetMap returns a mapping of pools for all of them (except for those skipped ones) 309 func (pe PodEntries) GetFilteredPoolsCPUSetMap(ignorePools sets.String) map[string]machine.CPUSet { 310 ret := make(map[string]machine.CPUSet) 311 if pe == nil { 312 return ret 313 } 314 315 for poolName, entries := range pe { 316 allocationInfo := entries.GetPoolEntry() 317 if allocationInfo != nil && !ignorePools.Has(poolName) { 318 ret[poolName] = allocationInfo.AllocationResult.Clone() 319 } 320 } 321 return ret 322 } 323 324 // GetFilteredPodEntries filter out PodEntries according to the given filter logic 325 func (pe PodEntries) GetFilteredPodEntries(filter func(ai *AllocationInfo) bool) PodEntries { 326 numaBindingEntries := make(PodEntries) 327 for podUID, containerEntries := range pe { 328 if containerEntries.IsPoolEntry() { 329 continue 330 } 331 332 for containerName, allocationInfo := range containerEntries { 333 if allocationInfo != nil && filter(allocationInfo) { 334 if numaBindingEntries[podUID] == nil { 335 numaBindingEntries[podUID] = make(ContainerEntries) 336 } 337 numaBindingEntries[podUID][containerName] = allocationInfo.Clone() 338 } 339 } 340 } 341 return numaBindingEntries 342 } 343 344 func (ns *NUMANodeState) Clone() *NUMANodeState { 345 if ns == nil { 346 return nil 347 } 348 return &NUMANodeState{ 349 DefaultCPUSet: ns.DefaultCPUSet.Clone(), 350 AllocatedCPUSet: ns.AllocatedCPUSet.Clone(), 351 PodEntries: ns.PodEntries.Clone(), 352 } 353 } 354 355 // GetAvailableCPUSet returns available cpuset in this numa 356 func (ns *NUMANodeState) GetAvailableCPUSet(reservedCPUs machine.CPUSet) machine.CPUSet { 357 if ns == nil { 358 return machine.NewCPUSet() 359 } 360 return ns.DefaultCPUSet.Difference(reservedCPUs) 361 } 362 363 // GetFilteredDefaultCPUSet returns default cpuset in this numa, along with the filter functions 364 func (ns *NUMANodeState) GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool) machine.CPUSet { 365 if ns == nil { 366 return machine.NewCPUSet() 367 } 368 369 res := ns.DefaultCPUSet.Clone() 370 res = res.Union(ns.AllocatedCPUSet) 371 for _, containerEntries := range ns.PodEntries { 372 for _, allocationInfo := range containerEntries { 373 if excludeWholeNUMA != nil && excludeWholeNUMA(allocationInfo) { 374 return machine.NewCPUSet() 375 } else if excludeEntry != nil && excludeEntry(allocationInfo) { 376 res = res.Difference(allocationInfo.AllocationResult) 377 } 378 } 379 } 380 return res 381 } 382 383 // ExistMatchedAllocationInfo returns true if the stated predicate holds true for some pods of this numa else it returns false. 384 func (ns *NUMANodeState) ExistMatchedAllocationInfo(f func(ai *AllocationInfo) bool) bool { 385 for _, containerEntries := range ns.PodEntries { 386 for _, allocationInfo := range containerEntries { 387 if f(allocationInfo) { 388 return true 389 } 390 } 391 } 392 393 return false 394 } 395 396 func (ns *NUMANodeState) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) { 397 if ns == nil { 398 return 399 } 400 401 if ns.PodEntries == nil { 402 ns.PodEntries = make(PodEntries) 403 } 404 if _, ok := ns.PodEntries[podUID]; !ok { 405 ns.PodEntries[podUID] = make(ContainerEntries) 406 } 407 408 ns.PodEntries[podUID][containerName] = allocationInfo.Clone() 409 } 410 411 // GetDefaultCPUSet returns default cpuset in this node 412 func (nm NUMANodeMap) GetDefaultCPUSet() machine.CPUSet { 413 res := machine.NewCPUSet() 414 for _, numaNodeState := range nm { 415 res = res.Union(numaNodeState.DefaultCPUSet) 416 } 417 return res 418 } 419 420 // GetAvailableCPUSet returns available cpuset in this node 421 func (nm NUMANodeMap) GetAvailableCPUSet(reservedCPUs machine.CPUSet) machine.CPUSet { 422 return nm.GetDefaultCPUSet().Difference(reservedCPUs) 423 } 424 425 // GetFilteredDefaultCPUSet returns default cpuset in this node, along with the filter functions 426 func (nm NUMANodeMap) GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool) machine.CPUSet { 427 res := machine.NewCPUSet() 428 for _, numaNodeState := range nm { 429 res = res.Union(numaNodeState.GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA)) 430 } 431 return res 432 } 433 434 // GetFilteredAvailableCPUSet returns available cpuset in this node, along with the filter functions 435 func (nm NUMANodeMap) GetFilteredAvailableCPUSet(reservedCPUs machine.CPUSet, 436 excludeEntry, excludeWholeNUMA func(ai *AllocationInfo) bool, 437 ) machine.CPUSet { 438 return nm.GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA).Difference(reservedCPUs) 439 } 440 441 // GetFilteredNUMASet return numa set except the numa which are excluded by the predicate. 442 func (nm NUMANodeMap) GetFilteredNUMASet(excludeNUMAPredicate func(ai *AllocationInfo) bool) machine.CPUSet { 443 res := machine.NewCPUSet() 444 for numaID, numaNodeState := range nm { 445 if numaNodeState.ExistMatchedAllocationInfo(excludeNUMAPredicate) { 446 continue 447 } 448 res.Add(numaID) 449 } 450 return res 451 } 452 453 func (nm NUMANodeMap) Clone() NUMANodeMap { 454 if nm == nil { 455 return nil 456 } 457 458 clone := make(NUMANodeMap) 459 for node, ns := range nm { 460 clone[node] = ns.Clone() 461 } 462 return clone 463 } 464 465 func (nm NUMANodeMap) String() string { 466 if nm == nil { 467 return "" 468 } 469 470 contentBytes, err := json.Marshal(nm) 471 if err != nil { 472 klog.Errorf("[NUMANodeMap.String] marshal NUMANodeMap failed with error: %v", err) 473 return "" 474 } 475 return string(contentBytes) 476 } 477 478 // reader is used to get information from local states 479 type reader interface { 480 GetMachineState() NUMANodeMap 481 GetPodEntries() PodEntries 482 GetAllocationInfo(podUID string, containerName string) *AllocationInfo 483 } 484 485 // writer is used to store information into local states, 486 // and it also provides functionality to maintain the local files 487 type writer interface { 488 SetMachineState(numaNodeMap NUMANodeMap) 489 SetPodEntries(podEntries PodEntries) 490 SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) 491 492 Delete(podUID string, containerName string) 493 ClearState() 494 } 495 496 // State interface provides methods for tracking and setting pod assignments 497 type State interface { 498 reader 499 writer 500 } 501 502 // ReadonlyState interface only provides methods for tracking pod assignments 503 type ReadonlyState interface { 504 reader 505 }