github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/plugin/rootfs/rootfs_pressure.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package rootfs 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "sort" 24 "sync" 25 "time" 26 27 v1 "k8s.io/api/core/v1" 28 "k8s.io/client-go/tools/events" 29 evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" 30 "k8s.io/kubernetes/pkg/kubelet/util/format" 31 32 pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1" 33 "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin" 34 "github.com/kubewharf/katalyst-core/pkg/client" 35 "github.com/kubewharf/katalyst-core/pkg/config" 36 "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" 37 "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction" 38 "github.com/kubewharf/katalyst-core/pkg/config/generic" 39 "github.com/kubewharf/katalyst-core/pkg/consts" 40 "github.com/kubewharf/katalyst-core/pkg/metaserver" 41 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper" 42 "github.com/kubewharf/katalyst-core/pkg/metrics" 43 "github.com/kubewharf/katalyst-core/pkg/util/general" 44 "github.com/kubewharf/katalyst-core/pkg/util/process" 45 ) 46 47 const ( 48 EvictionPluginNamePodRootfsPressure = "rootfs-pressure-eviction-plugin" 49 EvictionScopeSystemRootfs = "SystemRootfs" 50 evictionConditionSystemRootfs = "SystemRootfs" 51 metricsNameReclaimPriorityCount = "rootfs_reclaimed_pod_usage_priority_count" 52 ) 53 54 type PodRootfsPressureEvictionPlugin struct { 55 *process.StopControl 56 pluginName string 57 dynamicConfig *dynamic.DynamicAgentConfiguration 58 metaServer *metaserver.MetaServer 59 qosConf *generic.QoSConfiguration 60 emitter metrics.MetricEmitter 61 62 sync.RWMutex 63 isMinimumFreeThresholdMet bool 64 isMinimumInodesFreeThresholdMet bool 65 } 66 67 func NewPodRootfsPressureEvictionPlugin(_ *client.GenericClientSet, _ events.EventRecorder, 68 metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, conf *config.Configuration, 69 ) plugin.EvictionPlugin { 70 return &PodRootfsPressureEvictionPlugin{ 71 pluginName: EvictionPluginNamePodRootfsPressure, 72 metaServer: metaServer, 73 StopControl: process.NewStopControl(time.Time{}), 74 dynamicConfig: conf.DynamicAgentConfiguration, 75 qosConf: conf.GenericConfiguration.QoSConfiguration, 76 emitter: emitter, 77 } 78 } 79 80 func (r *PodRootfsPressureEvictionPlugin) Name() string { 81 if r == nil { 82 return "" 83 } 84 return r.pluginName 85 } 86 87 func (r *PodRootfsPressureEvictionPlugin) Start() { 88 return 89 } 90 91 func (r *PodRootfsPressureEvictionPlugin) ThresholdMet(_ context.Context) (*pluginapi.ThresholdMetResponse, error) { 92 resp := &pluginapi.ThresholdMetResponse{ 93 MetType: pluginapi.ThresholdMetType_NOT_MET, 94 EvictionScope: EvictionScopeSystemRootfs, 95 } 96 97 rootfsEvictionConfig := r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration 98 if !rootfsEvictionConfig.EnableRootfsPressureEviction { 99 return resp, nil 100 } 101 102 isMinimumFreeThresholdMet := r.minimumFreeThresholdMet(rootfsEvictionConfig) 103 isMinimumInodesFreeThresholdMet := r.minimumInodesFreeThresholdMet(rootfsEvictionConfig) 104 r.Lock() 105 r.isMinimumFreeThresholdMet = isMinimumFreeThresholdMet 106 r.isMinimumInodesFreeThresholdMet = isMinimumInodesFreeThresholdMet 107 r.Unlock() 108 109 if isMinimumFreeThresholdMet || isMinimumInodesFreeThresholdMet { 110 return &pluginapi.ThresholdMetResponse{ 111 MetType: pluginapi.ThresholdMetType_HARD_MET, 112 EvictionScope: EvictionScopeSystemRootfs, 113 Condition: &pluginapi.Condition{ 114 ConditionType: pluginapi.ConditionType_NODE_CONDITION, 115 Effects: []string{string(v1.TaintEffectNoSchedule)}, 116 ConditionName: evictionConditionSystemRootfs, 117 MetCondition: true, 118 }, 119 }, nil 120 } 121 122 return resp, nil 123 } 124 125 func (r *PodRootfsPressureEvictionPlugin) minimumFreeThresholdMet(rootfsEvictionConfig *eviction.RootfsPressureEvictionConfiguration) bool { 126 if rootfsEvictionConfig == nil || rootfsEvictionConfig.MinimumImageFsFreeThreshold == nil { 127 return false 128 } 129 130 imageFsFreeBytes, errAvailable := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsAvailable) 131 if errAvailable != nil { 132 general.Warningf("Failed to get MetricsImageFsAvailable: %q", errAvailable) 133 return false 134 } 135 imageFsCapacityBytes, errCapacity := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsCapacity) 136 if errCapacity != nil { 137 general.Warningf("Failed to get MetricsImageFsCapacity: %q", errCapacity) 138 return false 139 } 140 141 if rootfsEvictionConfig.MinimumImageFsDiskCapacityThreshold != nil && int64(imageFsCapacityBytes) < rootfsEvictionConfig.MinimumImageFsDiskCapacityThreshold.Value() { 142 general.Warningf("Ignore this node for MinimumImageFsDiskCapacityThreshold (size: %d, threshold: %d)", int64(imageFsCapacityBytes), rootfsEvictionConfig.MinimumImageFsDiskCapacityThreshold.Value()) 143 return false 144 } 145 146 if rootfsEvictionConfig.MinimumImageFsFreeThreshold.Quantity != nil { 147 // free < rootfsEvictionConfig.MinimumFreeInBytesThreshold -> met 148 if int64(imageFsFreeBytes) < rootfsEvictionConfig.MinimumImageFsFreeThreshold.Quantity.Value() { 149 general.Infof("ThresholdMet result, Reason: MinimumImageFsFreeInBytesThreshold (Available: %d, Threshold: %d)", int64(imageFsFreeBytes), rootfsEvictionConfig.MinimumImageFsFreeThreshold.Quantity.Value()) 150 return true 151 } 152 } else { 153 // free/capacity < rootfsEvictionConfig.MinimumFreeRateThreshold -> met 154 if imageFsFreeBytes > imageFsCapacityBytes || imageFsCapacityBytes == 0 { 155 general.Warningf("Invalid system rootfs metrics: %d/%d", int64(imageFsFreeBytes), int64(imageFsCapacityBytes)) 156 return false 157 } 158 ratio := imageFsFreeBytes / imageFsCapacityBytes 159 if ratio < float64(rootfsEvictionConfig.MinimumImageFsFreeThreshold.Percentage) { 160 general.Infof("ThresholdMet result, Reason: MinimumImageFsFreeRateThreshold (Rate: %04f, Threshold: %04f)", ratio, rootfsEvictionConfig.MinimumImageFsFreeThreshold.Percentage) 161 return true 162 } 163 } 164 165 return false 166 } 167 168 func (r *PodRootfsPressureEvictionPlugin) minimumInodesFreeThresholdMet(rootfsEvictionConfig *eviction.RootfsPressureEvictionConfiguration) bool { 169 if rootfsEvictionConfig == nil || rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold == nil { 170 return false 171 } 172 173 if rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Quantity != nil { 174 systemInodesFree, err := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodesFree) 175 if err != nil { 176 general.Warningf("Failed to get MetricsImageFsInodesFree: %q", err) 177 } else { 178 if int64(systemInodesFree) < rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Quantity.Value() { 179 general.Infof("ThresholdMet result, Reason: MinimumImageFsInodesFreeThreshold (Free: %d, Threshold: %d)", int64(systemInodesFree), rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Quantity.Value()) 180 return true 181 } 182 } 183 } else { 184 systemInodesFree, errInodesFree := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodesFree) 185 systemInodes, errInodes := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodes) 186 switch { 187 case errInodesFree != nil: 188 general.Warningf("Failed to get MetricsImageFsInodesFree: %q", errInodesFree) 189 case errInodes != nil: 190 general.Warningf("Failed to get MetricsImageFsInodes: %q", errInodes) 191 case systemInodesFree > systemInodes || systemInodes == 0: 192 general.Warningf("Invalid system rootfs inodes metric: %d/%d", int64(systemInodesFree), int64(systemInodes)) 193 default: 194 rate := systemInodesFree / systemInodes 195 if rate < float64(rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Percentage) { 196 general.Infof("ThresholdMet result, Reason: MinimumImageFsInodesFreeRateThreshold (Rate: %04f, Threshold: %04f)", rate, rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Percentage) 197 return true 198 } 199 } 200 } 201 202 return false 203 } 204 205 func (r *PodRootfsPressureEvictionPlugin) GetTopEvictionPods(_ context.Context, request *pluginapi.GetTopEvictionPodsRequest) (*pluginapi.GetTopEvictionPodsResponse, error) { 206 if request == nil { 207 return nil, fmt.Errorf("GetTopEvictionPods got nil request") 208 } 209 210 if len(request.ActivePods) == 0 { 211 general.Warningf("GetTopEvictionPods got empty active pods list") 212 return &pluginapi.GetTopEvictionPodsResponse{}, nil 213 } 214 215 rootfsEvictionConfig := r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration 216 if !rootfsEvictionConfig.EnableRootfsPressureEviction { 217 general.Warningf("GetTopEvictionPods RootfsPressureEviction is disabled") 218 return &pluginapi.GetTopEvictionPodsResponse{}, nil 219 } 220 221 r.RLock() 222 isMinimumFreeThresholdMet := r.isMinimumFreeThresholdMet 223 isMinimumInodesFreeThresholdMet := r.isMinimumInodesFreeThresholdMet 224 r.RUnlock() 225 226 var pods []*v1.Pod 227 var err error 228 if isMinimumFreeThresholdMet { 229 pods, err = r.getTopNPods(request.ActivePods, request.TopN, rootfsEvictionConfig.PodMinimumUsedThreshold, rootfsEvictionConfig.ReclaimedQoSPodUsedPriorityThreshold, r.getPodRootfsUsed) 230 } else if isMinimumInodesFreeThresholdMet { 231 pods, err = r.getTopNPods(request.ActivePods, request.TopN, rootfsEvictionConfig.PodMinimumInodesUsedThreshold, rootfsEvictionConfig.ReclaimedQoSPodInodesUsedPriorityThreshold, r.getPodRootfsInodesUsed) 232 } 233 if err != nil { 234 general.Warningf("GetTopEvictionPods get TopN pods failed: %q", err) 235 return &pluginapi.GetTopEvictionPodsResponse{}, nil 236 } 237 238 if len(pods) == 0 { 239 return &pluginapi.GetTopEvictionPodsResponse{}, nil 240 } 241 242 resp := &pluginapi.GetTopEvictionPodsResponse{ 243 TargetPods: pods, 244 } 245 if gracePeriod := rootfsEvictionConfig.GracePeriod; gracePeriod > 0 { 246 resp.DeletionOptions = &pluginapi.DeletionOptions{ 247 GracePeriodSeconds: gracePeriod, 248 } 249 } 250 251 return resp, nil 252 } 253 254 func (r *PodRootfsPressureEvictionPlugin) GetEvictPods(_ context.Context, request *pluginapi.GetEvictPodsRequest) (*pluginapi.GetEvictPodsResponse, error) { 255 if request == nil { 256 return nil, fmt.Errorf("GetEvictPods got nil request") 257 } 258 259 return &pluginapi.GetEvictPodsResponse{}, nil 260 } 261 262 type podUsageItem struct { 263 usage int64 264 capacity int64 265 priority bool 266 pod *v1.Pod 267 } 268 269 type podUsageList []podUsageItem 270 271 func (l podUsageList) Less(i, j int) bool { 272 if l[i].priority && !l[j].priority { 273 return true 274 } 275 if !l[i].priority && l[j].priority { 276 return false 277 } 278 return l[i].usage > l[j].usage 279 } 280 281 func (l podUsageList) Swap(i, j int) { 282 l[i], l[j] = l[j], l[i] 283 } 284 285 func (l podUsageList) Len() int { 286 return len(l) 287 } 288 289 func (r *PodRootfsPressureEvictionPlugin) podMinimumUsageProtectionMet(usage int64, percentage float64, minUsedThreshold *evictionapi.ThresholdValue) bool { 290 if minUsedThreshold == nil { 291 return false 292 } 293 if minUsedThreshold.Quantity != nil { 294 return usage < minUsedThreshold.Quantity.Value() 295 } else { 296 return percentage < float64(minUsedThreshold.Percentage) 297 } 298 } 299 300 func (r *PodRootfsPressureEvictionPlugin) reclaimedPodPriorityEvictionMet(pod *v1.Pod, used int64, percentage float64, reclaimedPodPriorityUsedThreshold *evictionapi.ThresholdValue) bool { 301 if reclaimedPodPriorityUsedThreshold == nil { 302 return false 303 } 304 isReclaimedPod, err := r.qosConf.CheckReclaimedQoSForPod(pod) 305 if err != nil { 306 general.Warningf("isReclaimedPod: pod UID: %s, error: %q", pod.UID, err) 307 return false 308 } 309 if !isReclaimedPod { 310 return false 311 } 312 if reclaimedPodPriorityUsedThreshold.Quantity != nil { 313 return used > reclaimedPodPriorityUsedThreshold.Quantity.Value() 314 } else { 315 return percentage > float64(reclaimedPodPriorityUsedThreshold.Percentage) 316 } 317 } 318 319 type getPodRootfsUsageFunc func(pod *v1.Pod) (int64, int64, error) 320 321 func (r *PodRootfsPressureEvictionPlugin) getTopNPods(pods []*v1.Pod, n uint64, minUsedThreshold, reclaimedPodPriorityUsedThreshold *evictionapi.ThresholdValue, getPodRootfsUsageFunc getPodRootfsUsageFunc) ([]*v1.Pod, error) { 322 var usageItemList podUsageList 323 324 for i := range pods { 325 usageItem := podUsageItem{ 326 pod: pods[i], 327 } 328 329 used, capacity, err := getPodRootfsUsageFunc(pods[i]) 330 if err != nil { 331 general.Warningf("Failed to get pod rootfs usage for %s: %q", pods[i].UID, err) 332 } else { 333 percentage := float64(used) / float64(capacity) 334 usageItem.usage = used 335 usageItem.capacity = capacity 336 usageItem.priority = r.reclaimedPodPriorityEvictionMet(pods[i], used, percentage, reclaimedPodPriorityUsedThreshold) 337 338 if !usageItem.priority { 339 if r.podMinimumUsageProtectionMet(used, percentage, minUsedThreshold) { 340 continue 341 } 342 } 343 usageItemList = append(usageItemList, usageItem) 344 } 345 } 346 347 if uint64(len(usageItemList)) > n { 348 sort.Sort(usageItemList) 349 usageItemList = usageItemList[:n] 350 } 351 352 var results []*v1.Pod 353 for _, item := range usageItemList { 354 general.Infof("Rootfs Eviction Request(Pod: %s, Used: %d, Capacity: %d, Priority: %v)", format.Pod(item.pod), item.usage, item.capacity, item.priority) 355 if item.priority { 356 _ = r.emitter.StoreInt64(metricsNameReclaimPriorityCount, 1, metrics.MetricTypeNameCount, 357 metrics.ConvertMapToTags(map[string]string{ 358 "uid": string(item.pod.UID), 359 "namespace": item.pod.Namespace, 360 "name": item.pod.Name, 361 "used": fmt.Sprintf("%d", item.usage), 362 "capacity": fmt.Sprintf("%d", item.capacity), 363 })...) 364 } 365 results = append(results, item.pod) 366 } 367 return results, nil 368 } 369 370 func (r *PodRootfsPressureEvictionPlugin) getPodRootfsUsed(pod *v1.Pod) (int64, int64, error) { 371 podRootfsUsed, err := helper.GetPodMetric(r.metaServer.MetricsFetcher, r.emitter, pod, consts.MetricsContainerRootfsUsed, -1) 372 if err != nil { 373 return 0, 0, err 374 } 375 376 rootfsCapacity, err := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsCapacity) 377 if err != nil { 378 return 0, 0, err 379 } 380 381 if rootfsCapacity < 1 { 382 return 0, 0, errors.New("invalid rootfs capacity") 383 } 384 385 return int64(podRootfsUsed), int64(rootfsCapacity), nil 386 } 387 388 func (r *PodRootfsPressureEvictionPlugin) getPodRootfsInodesUsed(pod *v1.Pod) (int64, int64, error) { 389 podRootfsInodesUsed, err := helper.GetPodMetric(r.metaServer.MetricsFetcher, r.emitter, pod, consts.MetricsContainerRootfsInodesUsed, -1) 390 if err != nil { 391 return 0, 0, err 392 } 393 394 rootfsInodes, err := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodes) 395 if err != nil { 396 return 0, 0, err 397 } 398 if rootfsInodes < 1 { 399 return 0, 0, errors.New("invalid rootfs inodes") 400 } 401 402 return int64(podRootfsInodesUsed), int64(rootfsInodes), nil 403 }