github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/plugin/rootfs/rootfs_pressure.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package rootfs
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"sort"
    24  	"sync"
    25  	"time"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/client-go/tools/events"
    29  	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
    30  	"k8s.io/kubernetes/pkg/kubelet/util/format"
    31  
    32  	pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1"
    33  	"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin"
    34  	"github.com/kubewharf/katalyst-core/pkg/client"
    35  	"github.com/kubewharf/katalyst-core/pkg/config"
    36  	"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
    37  	"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction"
    38  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    39  	"github.com/kubewharf/katalyst-core/pkg/consts"
    40  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    41  	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper"
    42  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    43  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    44  	"github.com/kubewharf/katalyst-core/pkg/util/process"
    45  )
    46  
    47  const (
    48  	EvictionPluginNamePodRootfsPressure = "rootfs-pressure-eviction-plugin"
    49  	EvictionScopeSystemRootfs           = "SystemRootfs"
    50  	evictionConditionSystemRootfs       = "SystemRootfs"
    51  	metricsNameReclaimPriorityCount     = "rootfs_reclaimed_pod_usage_priority_count"
    52  )
    53  
    54  type PodRootfsPressureEvictionPlugin struct {
    55  	*process.StopControl
    56  	pluginName    string
    57  	dynamicConfig *dynamic.DynamicAgentConfiguration
    58  	metaServer    *metaserver.MetaServer
    59  	qosConf       *generic.QoSConfiguration
    60  	emitter       metrics.MetricEmitter
    61  
    62  	sync.RWMutex
    63  	isMinimumFreeThresholdMet       bool
    64  	isMinimumInodesFreeThresholdMet bool
    65  }
    66  
    67  func NewPodRootfsPressureEvictionPlugin(_ *client.GenericClientSet, _ events.EventRecorder,
    68  	metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, conf *config.Configuration,
    69  ) plugin.EvictionPlugin {
    70  	return &PodRootfsPressureEvictionPlugin{
    71  		pluginName:    EvictionPluginNamePodRootfsPressure,
    72  		metaServer:    metaServer,
    73  		StopControl:   process.NewStopControl(time.Time{}),
    74  		dynamicConfig: conf.DynamicAgentConfiguration,
    75  		qosConf:       conf.GenericConfiguration.QoSConfiguration,
    76  		emitter:       emitter,
    77  	}
    78  }
    79  
    80  func (r *PodRootfsPressureEvictionPlugin) Name() string {
    81  	if r == nil {
    82  		return ""
    83  	}
    84  	return r.pluginName
    85  }
    86  
    87  func (r *PodRootfsPressureEvictionPlugin) Start() {
    88  	return
    89  }
    90  
    91  func (r *PodRootfsPressureEvictionPlugin) ThresholdMet(_ context.Context) (*pluginapi.ThresholdMetResponse, error) {
    92  	resp := &pluginapi.ThresholdMetResponse{
    93  		MetType:       pluginapi.ThresholdMetType_NOT_MET,
    94  		EvictionScope: EvictionScopeSystemRootfs,
    95  	}
    96  
    97  	rootfsEvictionConfig := r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration
    98  	if !rootfsEvictionConfig.EnableRootfsPressureEviction {
    99  		return resp, nil
   100  	}
   101  
   102  	isMinimumFreeThresholdMet := r.minimumFreeThresholdMet(rootfsEvictionConfig)
   103  	isMinimumInodesFreeThresholdMet := r.minimumInodesFreeThresholdMet(rootfsEvictionConfig)
   104  	r.Lock()
   105  	r.isMinimumFreeThresholdMet = isMinimumFreeThresholdMet
   106  	r.isMinimumInodesFreeThresholdMet = isMinimumInodesFreeThresholdMet
   107  	r.Unlock()
   108  
   109  	if isMinimumFreeThresholdMet || isMinimumInodesFreeThresholdMet {
   110  		return &pluginapi.ThresholdMetResponse{
   111  			MetType:       pluginapi.ThresholdMetType_HARD_MET,
   112  			EvictionScope: EvictionScopeSystemRootfs,
   113  			Condition: &pluginapi.Condition{
   114  				ConditionType: pluginapi.ConditionType_NODE_CONDITION,
   115  				Effects:       []string{string(v1.TaintEffectNoSchedule)},
   116  				ConditionName: evictionConditionSystemRootfs,
   117  				MetCondition:  true,
   118  			},
   119  		}, nil
   120  	}
   121  
   122  	return resp, nil
   123  }
   124  
   125  func (r *PodRootfsPressureEvictionPlugin) minimumFreeThresholdMet(rootfsEvictionConfig *eviction.RootfsPressureEvictionConfiguration) bool {
   126  	if rootfsEvictionConfig == nil || rootfsEvictionConfig.MinimumImageFsFreeThreshold == nil {
   127  		return false
   128  	}
   129  
   130  	imageFsFreeBytes, errAvailable := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsAvailable)
   131  	if errAvailable != nil {
   132  		general.Warningf("Failed to get MetricsImageFsAvailable: %q", errAvailable)
   133  		return false
   134  	}
   135  	imageFsCapacityBytes, errCapacity := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsCapacity)
   136  	if errCapacity != nil {
   137  		general.Warningf("Failed to get MetricsImageFsCapacity: %q", errCapacity)
   138  		return false
   139  	}
   140  
   141  	if rootfsEvictionConfig.MinimumImageFsDiskCapacityThreshold != nil && int64(imageFsCapacityBytes) < rootfsEvictionConfig.MinimumImageFsDiskCapacityThreshold.Value() {
   142  		general.Warningf("Ignore this node for MinimumImageFsDiskCapacityThreshold (size: %d, threshold: %d)", int64(imageFsCapacityBytes), rootfsEvictionConfig.MinimumImageFsDiskCapacityThreshold.Value())
   143  		return false
   144  	}
   145  
   146  	if rootfsEvictionConfig.MinimumImageFsFreeThreshold.Quantity != nil {
   147  		// free <  rootfsEvictionConfig.MinimumFreeInBytesThreshold -> met
   148  		if int64(imageFsFreeBytes) < rootfsEvictionConfig.MinimumImageFsFreeThreshold.Quantity.Value() {
   149  			general.Infof("ThresholdMet result, Reason: MinimumImageFsFreeInBytesThreshold (Available: %d, Threshold: %d)", int64(imageFsFreeBytes), rootfsEvictionConfig.MinimumImageFsFreeThreshold.Quantity.Value())
   150  			return true
   151  		}
   152  	} else {
   153  		// free/capacity < rootfsEvictionConfig.MinimumFreeRateThreshold -> met
   154  		if imageFsFreeBytes > imageFsCapacityBytes || imageFsCapacityBytes == 0 {
   155  			general.Warningf("Invalid system rootfs metrics: %d/%d", int64(imageFsFreeBytes), int64(imageFsCapacityBytes))
   156  			return false
   157  		}
   158  		ratio := imageFsFreeBytes / imageFsCapacityBytes
   159  		if ratio < float64(rootfsEvictionConfig.MinimumImageFsFreeThreshold.Percentage) {
   160  			general.Infof("ThresholdMet result, Reason: MinimumImageFsFreeRateThreshold (Rate: %04f, Threshold: %04f)", ratio, rootfsEvictionConfig.MinimumImageFsFreeThreshold.Percentage)
   161  			return true
   162  		}
   163  	}
   164  
   165  	return false
   166  }
   167  
   168  func (r *PodRootfsPressureEvictionPlugin) minimumInodesFreeThresholdMet(rootfsEvictionConfig *eviction.RootfsPressureEvictionConfiguration) bool {
   169  	if rootfsEvictionConfig == nil || rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold == nil {
   170  		return false
   171  	}
   172  
   173  	if rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Quantity != nil {
   174  		systemInodesFree, err := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodesFree)
   175  		if err != nil {
   176  			general.Warningf("Failed to get MetricsImageFsInodesFree: %q", err)
   177  		} else {
   178  			if int64(systemInodesFree) < rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Quantity.Value() {
   179  				general.Infof("ThresholdMet result, Reason: MinimumImageFsInodesFreeThreshold (Free: %d, Threshold: %d)", int64(systemInodesFree), rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Quantity.Value())
   180  				return true
   181  			}
   182  		}
   183  	} else {
   184  		systemInodesFree, errInodesFree := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodesFree)
   185  		systemInodes, errInodes := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodes)
   186  		switch {
   187  		case errInodesFree != nil:
   188  			general.Warningf("Failed to get MetricsImageFsInodesFree: %q", errInodesFree)
   189  		case errInodes != nil:
   190  			general.Warningf("Failed to get MetricsImageFsInodes: %q", errInodes)
   191  		case systemInodesFree > systemInodes || systemInodes == 0:
   192  			general.Warningf("Invalid system rootfs inodes metric: %d/%d", int64(systemInodesFree), int64(systemInodes))
   193  		default:
   194  			rate := systemInodesFree / systemInodes
   195  			if rate < float64(rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Percentage) {
   196  				general.Infof("ThresholdMet result, Reason: MinimumImageFsInodesFreeRateThreshold (Rate: %04f, Threshold: %04f)", rate, rootfsEvictionConfig.MinimumImageFsInodesFreeThreshold.Percentage)
   197  				return true
   198  			}
   199  		}
   200  	}
   201  
   202  	return false
   203  }
   204  
   205  func (r *PodRootfsPressureEvictionPlugin) GetTopEvictionPods(_ context.Context, request *pluginapi.GetTopEvictionPodsRequest) (*pluginapi.GetTopEvictionPodsResponse, error) {
   206  	if request == nil {
   207  		return nil, fmt.Errorf("GetTopEvictionPods got nil request")
   208  	}
   209  
   210  	if len(request.ActivePods) == 0 {
   211  		general.Warningf("GetTopEvictionPods got empty active pods list")
   212  		return &pluginapi.GetTopEvictionPodsResponse{}, nil
   213  	}
   214  
   215  	rootfsEvictionConfig := r.dynamicConfig.GetDynamicConfiguration().RootfsPressureEvictionConfiguration
   216  	if !rootfsEvictionConfig.EnableRootfsPressureEviction {
   217  		general.Warningf("GetTopEvictionPods RootfsPressureEviction is disabled")
   218  		return &pluginapi.GetTopEvictionPodsResponse{}, nil
   219  	}
   220  
   221  	r.RLock()
   222  	isMinimumFreeThresholdMet := r.isMinimumFreeThresholdMet
   223  	isMinimumInodesFreeThresholdMet := r.isMinimumInodesFreeThresholdMet
   224  	r.RUnlock()
   225  
   226  	var pods []*v1.Pod
   227  	var err error
   228  	if isMinimumFreeThresholdMet {
   229  		pods, err = r.getTopNPods(request.ActivePods, request.TopN, rootfsEvictionConfig.PodMinimumUsedThreshold, rootfsEvictionConfig.ReclaimedQoSPodUsedPriorityThreshold, r.getPodRootfsUsed)
   230  	} else if isMinimumInodesFreeThresholdMet {
   231  		pods, err = r.getTopNPods(request.ActivePods, request.TopN, rootfsEvictionConfig.PodMinimumInodesUsedThreshold, rootfsEvictionConfig.ReclaimedQoSPodInodesUsedPriorityThreshold, r.getPodRootfsInodesUsed)
   232  	}
   233  	if err != nil {
   234  		general.Warningf("GetTopEvictionPods get TopN pods failed: %q", err)
   235  		return &pluginapi.GetTopEvictionPodsResponse{}, nil
   236  	}
   237  
   238  	if len(pods) == 0 {
   239  		return &pluginapi.GetTopEvictionPodsResponse{}, nil
   240  	}
   241  
   242  	resp := &pluginapi.GetTopEvictionPodsResponse{
   243  		TargetPods: pods,
   244  	}
   245  	if gracePeriod := rootfsEvictionConfig.GracePeriod; gracePeriod > 0 {
   246  		resp.DeletionOptions = &pluginapi.DeletionOptions{
   247  			GracePeriodSeconds: gracePeriod,
   248  		}
   249  	}
   250  
   251  	return resp, nil
   252  }
   253  
   254  func (r *PodRootfsPressureEvictionPlugin) GetEvictPods(_ context.Context, request *pluginapi.GetEvictPodsRequest) (*pluginapi.GetEvictPodsResponse, error) {
   255  	if request == nil {
   256  		return nil, fmt.Errorf("GetEvictPods got nil request")
   257  	}
   258  
   259  	return &pluginapi.GetEvictPodsResponse{}, nil
   260  }
   261  
   262  type podUsageItem struct {
   263  	usage    int64
   264  	capacity int64
   265  	priority bool
   266  	pod      *v1.Pod
   267  }
   268  
   269  type podUsageList []podUsageItem
   270  
   271  func (l podUsageList) Less(i, j int) bool {
   272  	if l[i].priority && !l[j].priority {
   273  		return true
   274  	}
   275  	if !l[i].priority && l[j].priority {
   276  		return false
   277  	}
   278  	return l[i].usage > l[j].usage
   279  }
   280  
   281  func (l podUsageList) Swap(i, j int) {
   282  	l[i], l[j] = l[j], l[i]
   283  }
   284  
   285  func (l podUsageList) Len() int {
   286  	return len(l)
   287  }
   288  
   289  func (r *PodRootfsPressureEvictionPlugin) podMinimumUsageProtectionMet(usage int64, percentage float64, minUsedThreshold *evictionapi.ThresholdValue) bool {
   290  	if minUsedThreshold == nil {
   291  		return false
   292  	}
   293  	if minUsedThreshold.Quantity != nil {
   294  		return usage < minUsedThreshold.Quantity.Value()
   295  	} else {
   296  		return percentage < float64(minUsedThreshold.Percentage)
   297  	}
   298  }
   299  
   300  func (r *PodRootfsPressureEvictionPlugin) reclaimedPodPriorityEvictionMet(pod *v1.Pod, used int64, percentage float64, reclaimedPodPriorityUsedThreshold *evictionapi.ThresholdValue) bool {
   301  	if reclaimedPodPriorityUsedThreshold == nil {
   302  		return false
   303  	}
   304  	isReclaimedPod, err := r.qosConf.CheckReclaimedQoSForPod(pod)
   305  	if err != nil {
   306  		general.Warningf("isReclaimedPod: pod UID: %s, error: %q", pod.UID, err)
   307  		return false
   308  	}
   309  	if !isReclaimedPod {
   310  		return false
   311  	}
   312  	if reclaimedPodPriorityUsedThreshold.Quantity != nil {
   313  		return used > reclaimedPodPriorityUsedThreshold.Quantity.Value()
   314  	} else {
   315  		return percentage > float64(reclaimedPodPriorityUsedThreshold.Percentage)
   316  	}
   317  }
   318  
   319  type getPodRootfsUsageFunc func(pod *v1.Pod) (int64, int64, error)
   320  
   321  func (r *PodRootfsPressureEvictionPlugin) getTopNPods(pods []*v1.Pod, n uint64, minUsedThreshold, reclaimedPodPriorityUsedThreshold *evictionapi.ThresholdValue, getPodRootfsUsageFunc getPodRootfsUsageFunc) ([]*v1.Pod, error) {
   322  	var usageItemList podUsageList
   323  
   324  	for i := range pods {
   325  		usageItem := podUsageItem{
   326  			pod: pods[i],
   327  		}
   328  
   329  		used, capacity, err := getPodRootfsUsageFunc(pods[i])
   330  		if err != nil {
   331  			general.Warningf("Failed to get pod rootfs usage for %s: %q", pods[i].UID, err)
   332  		} else {
   333  			percentage := float64(used) / float64(capacity)
   334  			usageItem.usage = used
   335  			usageItem.capacity = capacity
   336  			usageItem.priority = r.reclaimedPodPriorityEvictionMet(pods[i], used, percentage, reclaimedPodPriorityUsedThreshold)
   337  
   338  			if !usageItem.priority {
   339  				if r.podMinimumUsageProtectionMet(used, percentage, minUsedThreshold) {
   340  					continue
   341  				}
   342  			}
   343  			usageItemList = append(usageItemList, usageItem)
   344  		}
   345  	}
   346  
   347  	if uint64(len(usageItemList)) > n {
   348  		sort.Sort(usageItemList)
   349  		usageItemList = usageItemList[:n]
   350  	}
   351  
   352  	var results []*v1.Pod
   353  	for _, item := range usageItemList {
   354  		general.Infof("Rootfs Eviction Request(Pod: %s, Used: %d, Capacity: %d, Priority: %v)", format.Pod(item.pod), item.usage, item.capacity, item.priority)
   355  		if item.priority {
   356  			_ = r.emitter.StoreInt64(metricsNameReclaimPriorityCount, 1, metrics.MetricTypeNameCount,
   357  				metrics.ConvertMapToTags(map[string]string{
   358  					"uid":       string(item.pod.UID),
   359  					"namespace": item.pod.Namespace,
   360  					"name":      item.pod.Name,
   361  					"used":      fmt.Sprintf("%d", item.usage),
   362  					"capacity":  fmt.Sprintf("%d", item.capacity),
   363  				})...)
   364  		}
   365  		results = append(results, item.pod)
   366  	}
   367  	return results, nil
   368  }
   369  
   370  func (r *PodRootfsPressureEvictionPlugin) getPodRootfsUsed(pod *v1.Pod) (int64, int64, error) {
   371  	podRootfsUsed, err := helper.GetPodMetric(r.metaServer.MetricsFetcher, r.emitter, pod, consts.MetricsContainerRootfsUsed, -1)
   372  	if err != nil {
   373  		return 0, 0, err
   374  	}
   375  
   376  	rootfsCapacity, err := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsCapacity)
   377  	if err != nil {
   378  		return 0, 0, err
   379  	}
   380  
   381  	if rootfsCapacity < 1 {
   382  		return 0, 0, errors.New("invalid rootfs capacity")
   383  	}
   384  
   385  	return int64(podRootfsUsed), int64(rootfsCapacity), nil
   386  }
   387  
   388  func (r *PodRootfsPressureEvictionPlugin) getPodRootfsInodesUsed(pod *v1.Pod) (int64, int64, error) {
   389  	podRootfsInodesUsed, err := helper.GetPodMetric(r.metaServer.MetricsFetcher, r.emitter, pod, consts.MetricsContainerRootfsInodesUsed, -1)
   390  	if err != nil {
   391  		return 0, 0, err
   392  	}
   393  
   394  	rootfsInodes, err := helper.GetNodeMetric(r.metaServer.MetricsFetcher, r.emitter, consts.MetricsImageFsInodes)
   395  	if err != nil {
   396  		return 0, 0, err
   397  	}
   398  	if rootfsInodes < 1 {
   399  		return 0, 0, errors.New("invalid rootfs inodes")
   400  	}
   401  
   402  	return int64(podRootfsInodesUsed), int64(rootfsInodes), nil
   403  }