github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_async_handler.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package dynamicpolicy 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/util/sets" 26 27 "github.com/kubewharf/katalyst-api/pkg/consts" 28 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/advisorsvc" 29 cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts" 30 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" 31 "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" 32 coreconfig "github.com/kubewharf/katalyst-core/pkg/config" 33 dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" 34 "github.com/kubewharf/katalyst-core/pkg/metaserver" 35 "github.com/kubewharf/katalyst-core/pkg/metrics" 36 cgroupcm "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" 37 cgroupcmutils "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager" 38 "github.com/kubewharf/katalyst-core/pkg/util/general" 39 "github.com/kubewharf/katalyst-core/pkg/util/machine" 40 ) 41 42 // checkCPUSet emit errors if the memory allocation falls into unexpected results 43 func (p *DynamicPolicy) checkCPUSet(_ *coreconfig.Configuration, 44 _ interface{}, 45 _ *dynamicconfig.DynamicAgentConfiguration, 46 _ metrics.MetricEmitter, 47 _ *metaserver.MetaServer, 48 ) { 49 general.Infof("exec checkCPUSet") 50 var ( 51 err error 52 invalidCPUSet = false 53 cpuSetOverlap = false 54 ) 55 56 defer func() { 57 if err != nil { 58 _ = general.UpdateHealthzStateByError(cpuconsts.CheckCPUSet, err) 59 } else if invalidCPUSet { 60 _ = general.UpdateHealthzState(cpuconsts.CheckCPUSet, general.HealthzCheckStateNotReady, "invalid cpuset exists") 61 } else if cpuSetOverlap { 62 _ = general.UpdateHealthzState(cpuconsts.CheckCPUSet, general.HealthzCheckStateNotReady, "cpuset overlap") 63 } else { 64 _ = general.UpdateHealthzState(cpuconsts.CheckCPUSet, general.HealthzCheckStateReady, "") 65 } 66 }() 67 68 podEntries := p.state.GetPodEntries() 69 actualCPUSets := make(map[string]map[string]machine.CPUSet) 70 for podUID, containerEntries := range podEntries { 71 if containerEntries.IsPoolEntry() { 72 continue 73 } 74 75 for containerName, allocationInfo := range containerEntries { 76 if allocationInfo == nil || !allocationInfo.CheckMainContainer() { 77 continue 78 } else if state.CheckShared(allocationInfo) && p.getContainerRequestedCores(allocationInfo) == 0 { 79 general.Warningf("skip cpuset checking for pod: %s/%s container: %s with zero cpu request", 80 allocationInfo.PodNamespace, allocationInfo.PodName, containerName) 81 continue 82 } 83 84 tags := metrics.ConvertMapToTags(map[string]string{ 85 "podNamespace": allocationInfo.PodNamespace, 86 "podName": allocationInfo.PodName, 87 "containerName": allocationInfo.ContainerName, 88 }) 89 var ( 90 containerId string 91 cpuSetStats *cgroupcm.CPUSetStats 92 ) 93 94 containerId, err = p.metaServer.GetContainerID(podUID, containerName) 95 if err != nil { 96 general.Errorf("get container id of pod: %s container: %s failed with error: %v", podUID, containerName, err) 97 continue 98 } 99 100 cpuSetStats, err = cgroupcmutils.GetCPUSetForContainer(podUID, containerId) 101 if err != nil { 102 general.Errorf("GetCPUSet of pod: %s container: name(%s), id(%s) failed with error: %v", 103 podUID, containerName, containerId, err) 104 _ = p.emitter.StoreInt64(util.MetricNameRealStateInvalid, 1, metrics.MetricTypeNameRaw, tags...) 105 continue 106 } 107 108 if actualCPUSets[podUID] == nil { 109 actualCPUSets[podUID] = make(map[string]machine.CPUSet) 110 } 111 actualCPUSets[podUID][containerName] = machine.MustParse(cpuSetStats.CPUs) 112 113 general.Infof("pod: %s/%s, container: %s, state CPUSet: %s, actual CPUSet: %s", 114 allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, 115 allocationInfo.AllocationResult.String(), actualCPUSets[podUID][containerName].String()) 116 117 // only do comparison for dedicated_cores with numa_biding to avoid effect of adjustment for shared_cores 118 if !state.CheckDedicated(allocationInfo) { 119 continue 120 } 121 122 if !actualCPUSets[podUID][containerName].Equals(allocationInfo.OriginalAllocationResult) { 123 invalidCPUSet = true 124 general.Errorf("pod: %s/%s, container: %s, cpuset invalid", 125 allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName) 126 _ = p.emitter.StoreInt64(util.MetricNameCPUSetInvalid, 1, metrics.MetricTypeNameRaw, tags...) 127 } 128 } 129 } 130 131 unionDedicatedCPUSet := machine.NewCPUSet() 132 unionSharedCPUSet := machine.NewCPUSet() 133 134 for podUID, containerEntries := range actualCPUSets { 135 for containerName, cset := range containerEntries { 136 allocationInfo := podEntries[podUID][containerName] 137 if allocationInfo == nil { 138 continue 139 } 140 141 switch allocationInfo.QoSLevel { 142 case consts.PodAnnotationQoSLevelDedicatedCores: 143 if !cpuSetOverlap && cset.Intersection(unionDedicatedCPUSet).Size() != 0 { 144 cpuSetOverlap = true 145 general.Errorf("pod: %s/%s, container: %s cpuset: %s overlaps with others", 146 allocationInfo.PodNamespace, allocationInfo.PodName, allocationInfo.ContainerName, cset.String()) 147 } 148 unionDedicatedCPUSet = unionDedicatedCPUSet.Union(cset) 149 case consts.PodAnnotationQoSLevelSharedCores: 150 unionSharedCPUSet = unionSharedCPUSet.Union(cset) 151 } 152 } 153 } 154 155 regionOverlap := unionSharedCPUSet.Intersection(unionDedicatedCPUSet).Size() != 0 156 if regionOverlap { 157 general.Errorf("shared_cores union cpuset: %s overlaps with dedicated_cores union cpuset: %s", 158 unionSharedCPUSet.String(), unionDedicatedCPUSet.String()) 159 } 160 161 if !cpuSetOverlap { 162 cpuSetOverlap = regionOverlap 163 } 164 if cpuSetOverlap { 165 general.Errorf("found cpuset overlap. actualCPUSets: %+v", actualCPUSets) 166 _ = p.emitter.StoreInt64(util.MetricNameCPUSetOverlap, 1, metrics.MetricTypeNameRaw) 167 } 168 169 general.Infof("finish checkCPUSet") 170 } 171 172 // clearResidualState is used to clean residual pods in local state 173 func (p *DynamicPolicy) clearResidualState(_ *coreconfig.Configuration, 174 _ interface{}, 175 _ *dynamicconfig.DynamicAgentConfiguration, 176 _ metrics.MetricEmitter, 177 _ *metaserver.MetaServer, 178 ) { 179 general.Infof("exec clearResidualState") 180 var ( 181 err error 182 podList []*v1.Pod 183 ) 184 residualSet := make(map[string]bool) 185 186 defer func() { 187 _ = general.UpdateHealthzStateByError(cpuconsts.ClearResidualState, err) 188 }() 189 190 if p.metaServer == nil { 191 general.Errorf("nil metaServer") 192 return 193 } 194 195 ctx := context.Background() 196 podList, err = p.metaServer.GetPodList(ctx, nil) 197 if err != nil { 198 general.Errorf("get pod list failed: %v", err) 199 return 200 } 201 202 podSet := sets.NewString() 203 for _, pod := range podList { 204 podSet.Insert(fmt.Sprintf("%v", pod.UID)) 205 } 206 207 p.Lock() 208 defer p.Unlock() 209 210 podEntries := p.state.GetPodEntries() 211 for podUID, containerEntries := range podEntries { 212 if containerEntries.IsPoolEntry() { 213 continue 214 } 215 216 if !podSet.Has(podUID) { 217 residualSet[podUID] = true 218 p.residualHitMap[podUID] += 1 219 general.Infof("found pod: %s with state but doesn't show up in pod watcher, hit count: %d", podUID, p.residualHitMap[podUID]) 220 } 221 } 222 223 podsToDelete := sets.NewString() 224 for podUID, hitCount := range p.residualHitMap { 225 if !residualSet[podUID] { 226 general.Infof("already found pod: %s in pod watcher or its state is cleared, delete it from residualHitMap", podUID) 227 delete(p.residualHitMap, podUID) 228 continue 229 } 230 231 if time.Duration(hitCount)*stateCheckPeriod >= maxResidualTime { 232 podsToDelete.Insert(podUID) 233 } 234 } 235 236 if podsToDelete.Len() > 0 { 237 for { 238 podUID, found := podsToDelete.PopAny() 239 if !found { 240 break 241 } 242 243 var rErr error 244 if p.enableCPUAdvisor { 245 _, rErr = p.advisorClient.RemovePod(ctx, &advisorsvc.RemovePodRequest{ 246 PodUid: podUID, 247 }) 248 } 249 if rErr != nil { 250 general.Errorf("remove residual pod: %s in sys advisor failed with error: %v, remain it in state", podUID, rErr) 251 continue 252 } 253 254 general.Infof("clear residual pod: %s in state", podUID) 255 delete(podEntries, podUID) 256 } 257 258 var updatedMachineState state.NUMANodeMap 259 updatedMachineState, err = generateMachineStateFromPodEntries(p.machineInfo.CPUTopology, podEntries) 260 if err != nil { 261 general.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err) 262 return 263 } 264 265 p.state.SetPodEntries(podEntries) 266 p.state.SetMachineState(updatedMachineState) 267 268 err = p.adjustAllocationEntries() 269 if err != nil { 270 general.ErrorS(err, "adjustAllocationEntries failed") 271 } 272 } 273 } 274 275 // syncCPUIdle is used to set cpu idle for reclaimed cores 276 func (p *DynamicPolicy) syncCPUIdle(_ *coreconfig.Configuration, 277 _ interface{}, 278 _ *dynamicconfig.DynamicAgentConfiguration, 279 _ metrics.MetricEmitter, 280 _ *metaserver.MetaServer, 281 ) { 282 general.Infof("exec syncCPUIdle") 283 var err error 284 defer func() { 285 _ = general.UpdateHealthzStateByError(cpuconsts.SyncCPUIdle, err) 286 }() 287 288 if !cgroupcm.IsCPUIdleSupported() { 289 general.Warningf("cpu idle isn't unsupported, skip syncing") 290 return 291 } 292 293 err = cgroupcmutils.ApplyCPUWithRelativePath(p.reclaimRelativeRootCgroupPath, &cgroupcm.CPUData{CpuIdlePtr: &p.enableCPUIdle}) 294 if err != nil { 295 general.Errorf("ApplyCPUWithRelativePath in %s with enableCPUIdle: %v in failed with error: %v", 296 p.reclaimRelativeRootCgroupPath, p.enableCPUIdle, err) 297 } 298 }