github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/plugin/memory/helper.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package memory 18 19 import ( 20 "strconv" 21 22 v1 "k8s.io/api/core/v1" 23 24 "github.com/kubewharf/katalyst-core/pkg/config" 25 evictionconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction" 26 "github.com/kubewharf/katalyst-core/pkg/metaserver" 27 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper" 28 "github.com/kubewharf/katalyst-core/pkg/metrics" 29 "github.com/kubewharf/katalyst-core/pkg/util/general" 30 "github.com/kubewharf/katalyst-core/pkg/util/native" 31 ) 32 33 // eviction scope related variables 34 // actionReclaimedEviction for reclaimed_cores, while actionEviction for all pods 35 const ( 36 actionNoop = iota 37 actionReclaimedEviction 38 actionEviction 39 ) 40 41 // control-related variables 42 const ( 43 kswapdStealPreviousCycleMissing = -1 44 nonExistNumaID = -1 45 ) 46 47 const ( 48 metricsNameFetchMetricError = "fetch_metric_error_count" 49 metricsNameNumberOfTargetPods = "number_of_target_pods_raw" 50 metricsNameThresholdMet = "threshold_met_count" 51 metricsNameNumaMetric = "numa_metric_raw" 52 metricsNameSystemMetric = "system_metric_raw" 53 54 metricsTagKeyEvictionScope = "eviction_scope" 55 metricsTagKeyDetectionLevel = "detection_level" 56 metricsTagKeyNumaID = "numa_id" 57 metricsTagKeyAction = "action" 58 metricsTagKeyMetricName = "metric_name" 59 60 metricsTagValueDetectionLevelNuma = "numa" 61 metricsTagValueDetectionLevelSystem = "system" 62 metricsTagValueActionReclaimedEviction = "reclaimed_eviction" 63 metricsTagValueActionEviction = "eviction" 64 metricsTagValueNumaFreeBelowWatermarkTimes = "numa_free_below_watermark_times" 65 metricsTagValueSystemKswapdDiff = "system_kswapd_diff" 66 metricsTagValueSystemKswapdRateExceedDuration = "system_kswapd_rate_exceed_duration" 67 ) 68 69 const ( 70 errMsgCheckReclaimedPodFailed = "failed to check reclaimed pod, pod: %s/%s, err: %v" 71 ) 72 73 // EvictionHelper is a general tool collection for all memory eviction plugin 74 type EvictionHelper struct { 75 metaServer *metaserver.MetaServer 76 emitter metrics.MetricEmitter 77 reclaimedPodFilter func(pod *v1.Pod) (bool, error) 78 } 79 80 func NewEvictionHelper(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, conf *config.Configuration) *EvictionHelper { 81 return &EvictionHelper{ 82 metaServer: metaServer, 83 emitter: emitter, 84 reclaimedPodFilter: conf.CheckReclaimedQoSForPod, 85 } 86 } 87 88 func (e *EvictionHelper) selectTopNPodsToEvictByMetrics(activePods []*v1.Pod, topN uint64, numaID, 89 action int, rankingMetrics []string, podToEvictMap map[string]*v1.Pod, 90 ) { 91 filteredPods := e.filterPods(activePods, action) 92 if filteredPods != nil { 93 general.NewMultiSorter(e.getEvictionCmpFuncs(rankingMetrics, numaID)...).Sort(native.NewPodSourceImpList(filteredPods)) 94 for i := 0; uint64(i) < general.MinUInt64(topN, uint64(len(filteredPods))); i++ { 95 podToEvictMap[string(filteredPods[i].UID)] = filteredPods[i] 96 } 97 } 98 } 99 100 func (e *EvictionHelper) filterPods(pods []*v1.Pod, action int) []*v1.Pod { 101 switch action { 102 case actionReclaimedEviction: 103 return native.FilterPods(pods, e.reclaimedPodFilter) 104 case actionEviction: 105 return pods 106 default: 107 return nil 108 } 109 } 110 111 // getEvictionCmpFuncs returns a comparison function list to judge the eviction order of different pods 112 func (e *EvictionHelper) getEvictionCmpFuncs(rankingMetrics []string, numaID int) []general.CmpFunc { 113 cmpFuncs := make([]general.CmpFunc, 0, len(rankingMetrics)) 114 115 for _, m := range rankingMetrics { 116 currentMetric := m 117 cmpFuncs = append(cmpFuncs, func(s1, s2 interface{}) int { 118 p1, p2 := s1.(*v1.Pod), s2.(*v1.Pod) 119 switch currentMetric { 120 case evictionconfig.FakeMetricQoSLevel: 121 isReclaimedPod1, err1 := e.reclaimedPodFilter(p1) 122 if err1 != nil { 123 general.Errorf(errMsgCheckReclaimedPodFailed, p1.Namespace, p1.Name, err1) 124 } 125 126 isReclaimedPod2, err2 := e.reclaimedPodFilter(p2) 127 if err2 != nil { 128 general.Errorf(errMsgCheckReclaimedPodFailed, p2.Namespace, p2.Name, err2) 129 } 130 131 if err1 != nil || err2 != nil { 132 // prioritize evicting the pod for which no error is returned 133 return general.CmpError(err1, err2) 134 } 135 136 // prioritize evicting the pod whose QoS level is reclaimed_cores 137 return general.CmpBool(isReclaimedPod1, isReclaimedPod2) 138 case evictionconfig.FakeMetricPriority: 139 // prioritize evicting the pod whose priority is lower 140 return general.ReverseCmpFunc(native.PodPriorityCmpFunc)(p1, p2) 141 default: 142 p1Metric, p1Err := helper.GetPodMetric(e.metaServer.MetricsFetcher, e.emitter, p1, currentMetric, numaID) 143 p2Metric, p2Err := helper.GetPodMetric(e.metaServer.MetricsFetcher, e.emitter, p2, currentMetric, numaID) 144 p1Found := p1Err == nil 145 p2Found := p2Err == nil 146 if !p1Found || !p2Found { 147 _ = e.emitter.StoreInt64(metricsNameFetchMetricError, 1, metrics.MetricTypeNameCount, 148 metrics.ConvertMapToTags(map[string]string{ 149 metricsTagKeyNumaID: strconv.Itoa(numaID), 150 })...) 151 // prioritize evicting the pod for which no stats were found 152 return general.CmpBool(!p1Found, !p2Found) 153 } 154 155 // prioritize evicting the pod whose metric value is greater 156 return general.CmpFloat64(p1Metric, p2Metric) 157 } 158 }) 159 } 160 161 return cmpFuncs 162 }