github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/plugin/memory/helper.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package memory
    18  
    19  import (
    20  	"strconv"
    21  
    22  	v1 "k8s.io/api/core/v1"
    23  
    24  	"github.com/kubewharf/katalyst-core/pkg/config"
    25  	evictionconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction"
    26  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    27  	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper"
    28  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    29  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    30  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    31  )
    32  
    33  // eviction scope related variables
    34  // actionReclaimedEviction for reclaimed_cores, while actionEviction for all pods
    35  const (
    36  	actionNoop = iota
    37  	actionReclaimedEviction
    38  	actionEviction
    39  )
    40  
    41  // control-related variables
    42  const (
    43  	kswapdStealPreviousCycleMissing = -1
    44  	nonExistNumaID                  = -1
    45  )
    46  
    47  const (
    48  	metricsNameFetchMetricError   = "fetch_metric_error_count"
    49  	metricsNameNumberOfTargetPods = "number_of_target_pods_raw"
    50  	metricsNameThresholdMet       = "threshold_met_count"
    51  	metricsNameNumaMetric         = "numa_metric_raw"
    52  	metricsNameSystemMetric       = "system_metric_raw"
    53  
    54  	metricsTagKeyEvictionScope  = "eviction_scope"
    55  	metricsTagKeyDetectionLevel = "detection_level"
    56  	metricsTagKeyNumaID         = "numa_id"
    57  	metricsTagKeyAction         = "action"
    58  	metricsTagKeyMetricName     = "metric_name"
    59  
    60  	metricsTagValueDetectionLevelNuma             = "numa"
    61  	metricsTagValueDetectionLevelSystem           = "system"
    62  	metricsTagValueActionReclaimedEviction        = "reclaimed_eviction"
    63  	metricsTagValueActionEviction                 = "eviction"
    64  	metricsTagValueNumaFreeBelowWatermarkTimes    = "numa_free_below_watermark_times"
    65  	metricsTagValueSystemKswapdDiff               = "system_kswapd_diff"
    66  	metricsTagValueSystemKswapdRateExceedDuration = "system_kswapd_rate_exceed_duration"
    67  )
    68  
    69  const (
    70  	errMsgCheckReclaimedPodFailed = "failed to check reclaimed pod, pod: %s/%s, err: %v"
    71  )
    72  
    73  // EvictionHelper is a general tool collection for all memory eviction plugin
    74  type EvictionHelper struct {
    75  	metaServer         *metaserver.MetaServer
    76  	emitter            metrics.MetricEmitter
    77  	reclaimedPodFilter func(pod *v1.Pod) (bool, error)
    78  }
    79  
    80  func NewEvictionHelper(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, conf *config.Configuration) *EvictionHelper {
    81  	return &EvictionHelper{
    82  		metaServer:         metaServer,
    83  		emitter:            emitter,
    84  		reclaimedPodFilter: conf.CheckReclaimedQoSForPod,
    85  	}
    86  }
    87  
    88  func (e *EvictionHelper) selectTopNPodsToEvictByMetrics(activePods []*v1.Pod, topN uint64, numaID,
    89  	action int, rankingMetrics []string, podToEvictMap map[string]*v1.Pod,
    90  ) {
    91  	filteredPods := e.filterPods(activePods, action)
    92  	if filteredPods != nil {
    93  		general.NewMultiSorter(e.getEvictionCmpFuncs(rankingMetrics, numaID)...).Sort(native.NewPodSourceImpList(filteredPods))
    94  		for i := 0; uint64(i) < general.MinUInt64(topN, uint64(len(filteredPods))); i++ {
    95  			podToEvictMap[string(filteredPods[i].UID)] = filteredPods[i]
    96  		}
    97  	}
    98  }
    99  
   100  func (e *EvictionHelper) filterPods(pods []*v1.Pod, action int) []*v1.Pod {
   101  	switch action {
   102  	case actionReclaimedEviction:
   103  		return native.FilterPods(pods, e.reclaimedPodFilter)
   104  	case actionEviction:
   105  		return pods
   106  	default:
   107  		return nil
   108  	}
   109  }
   110  
   111  // getEvictionCmpFuncs returns a comparison function list to judge the eviction order of different pods
   112  func (e *EvictionHelper) getEvictionCmpFuncs(rankingMetrics []string, numaID int) []general.CmpFunc {
   113  	cmpFuncs := make([]general.CmpFunc, 0, len(rankingMetrics))
   114  
   115  	for _, m := range rankingMetrics {
   116  		currentMetric := m
   117  		cmpFuncs = append(cmpFuncs, func(s1, s2 interface{}) int {
   118  			p1, p2 := s1.(*v1.Pod), s2.(*v1.Pod)
   119  			switch currentMetric {
   120  			case evictionconfig.FakeMetricQoSLevel:
   121  				isReclaimedPod1, err1 := e.reclaimedPodFilter(p1)
   122  				if err1 != nil {
   123  					general.Errorf(errMsgCheckReclaimedPodFailed, p1.Namespace, p1.Name, err1)
   124  				}
   125  
   126  				isReclaimedPod2, err2 := e.reclaimedPodFilter(p2)
   127  				if err2 != nil {
   128  					general.Errorf(errMsgCheckReclaimedPodFailed, p2.Namespace, p2.Name, err2)
   129  				}
   130  
   131  				if err1 != nil || err2 != nil {
   132  					// prioritize evicting the pod for which no error is returned
   133  					return general.CmpError(err1, err2)
   134  				}
   135  
   136  				// prioritize evicting the pod whose QoS level is reclaimed_cores
   137  				return general.CmpBool(isReclaimedPod1, isReclaimedPod2)
   138  			case evictionconfig.FakeMetricPriority:
   139  				// prioritize evicting the pod whose priority is lower
   140  				return general.ReverseCmpFunc(native.PodPriorityCmpFunc)(p1, p2)
   141  			default:
   142  				p1Metric, p1Err := helper.GetPodMetric(e.metaServer.MetricsFetcher, e.emitter, p1, currentMetric, numaID)
   143  				p2Metric, p2Err := helper.GetPodMetric(e.metaServer.MetricsFetcher, e.emitter, p2, currentMetric, numaID)
   144  				p1Found := p1Err == nil
   145  				p2Found := p2Err == nil
   146  				if !p1Found || !p2Found {
   147  					_ = e.emitter.StoreInt64(metricsNameFetchMetricError, 1, metrics.MetricTypeNameCount,
   148  						metrics.ConvertMapToTags(map[string]string{
   149  							metricsTagKeyNumaID: strconv.Itoa(numaID),
   150  						})...)
   151  					// prioritize evicting the pod for which no stats were found
   152  					return general.CmpBool(!p1Found, !p2Found)
   153  				}
   154  
   155  				// prioritize evicting the pod whose metric value is greater
   156  				return general.CmpFloat64(p1Metric, p2Metric)
   157  			}
   158  		})
   159  	}
   160  
   161  	return cmpFuncs
   162  }