github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/plugin/memory/rss_overuse.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package memory 18 19 import ( 20 "context" 21 "fmt" 22 "strconv" 23 "time" 24 25 v1 "k8s.io/api/core/v1" 26 "k8s.io/apimachinery/pkg/labels" 27 "k8s.io/apimachinery/pkg/util/sets" 28 "k8s.io/client-go/tools/events" 29 30 apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" 31 pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1" 32 "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin" 33 "github.com/kubewharf/katalyst-core/pkg/client" 34 "github.com/kubewharf/katalyst-core/pkg/config" 35 "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" 36 "github.com/kubewharf/katalyst-core/pkg/config/agent/eviction" 37 "github.com/kubewharf/katalyst-core/pkg/config/generic" 38 "github.com/kubewharf/katalyst-core/pkg/consts" 39 "github.com/kubewharf/katalyst-core/pkg/metaserver" 40 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper" 41 "github.com/kubewharf/katalyst-core/pkg/metrics" 42 "github.com/kubewharf/katalyst-core/pkg/util/general" 43 "github.com/kubewharf/katalyst-core/pkg/util/process" 44 "github.com/kubewharf/katalyst-core/pkg/util/qos" 45 ) 46 47 const ( 48 EvictionPluginNameRssOveruse = "rss-overuse-eviction-plugin" 49 50 RssOveruseEvictionReason = "hit rss overuse policy, threshold is %.2f, current pod rss is %.2f, pod memory request is %d" 51 ) 52 53 func NewRssOveruseEvictionPlugin(_ *client.GenericClientSet, _ events.EventRecorder, 54 metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, conf *config.Configuration, 55 ) plugin.EvictionPlugin { 56 return &RssOveruseEvictionPlugin{ 57 StopControl: process.NewStopControl(time.Time{}), 58 emitter: emitter, 59 reclaimedPodFilter: conf.CheckReclaimedQoSForPod, 60 pluginName: EvictionPluginNameRssOveruse, 61 metaServer: metaServer, 62 supportedQosLevels: sets.NewString(apiconsts.PodAnnotationQoSLevelReclaimedCores, apiconsts.PodAnnotationQoSLevelSharedCores), 63 64 dynamicConfig: conf.DynamicAgentConfiguration, 65 qosConf: conf.QoSConfiguration, 66 evictionConfig: conf.MemoryPressureEvictionConfiguration, 67 } 68 } 69 70 // RssOveruseEvictionPlugin implements the EvictPlugin interface. It triggers pod eviction based on the rss usage ratio. 71 // Once a pod use more rss than the specified threshold, this plugin will evict the pod.The threshold is calculated based 72 // on pod's memory request. Its main goal is to make sure sufficient memory for page cache in some scenarios in which 73 // service use page cache to improve performance. 74 type RssOveruseEvictionPlugin struct { 75 *process.StopControl 76 77 emitter metrics.MetricEmitter 78 reclaimedPodFilter func(pod *v1.Pod) (bool, error) 79 pluginName string 80 metaServer *metaserver.MetaServer 81 supportedQosLevels sets.String 82 83 dynamicConfig *dynamic.DynamicAgentConfiguration 84 qosConf *generic.QoSConfiguration 85 evictionConfig *eviction.MemoryPressureEvictionConfiguration 86 } 87 88 func (r *RssOveruseEvictionPlugin) Name() string { 89 if r == nil { 90 return "" 91 } 92 93 return r.pluginName 94 } 95 96 func (r *RssOveruseEvictionPlugin) ThresholdMet(_ context.Context) (*pluginapi.ThresholdMetResponse, error) { 97 return &pluginapi.ThresholdMetResponse{ 98 MetType: pluginapi.ThresholdMetType_NOT_MET, 99 }, nil 100 } 101 102 func (r *RssOveruseEvictionPlugin) GetTopEvictionPods(_ context.Context, _ *pluginapi.GetTopEvictionPodsRequest) (*pluginapi.GetTopEvictionPodsResponse, error) { 103 return &pluginapi.GetTopEvictionPodsResponse{}, nil 104 } 105 106 func (r *RssOveruseEvictionPlugin) GetEvictPods(_ context.Context, request *pluginapi.GetEvictPodsRequest) (*pluginapi.GetEvictPodsResponse, error) { 107 result := make([]*pluginapi.EvictPod, 0) 108 109 dynamicConfig := r.dynamicConfig.GetDynamicConfiguration() 110 if !dynamicConfig.EnableRSSOveruseEviction { 111 return &pluginapi.GetEvictPodsResponse{EvictPods: result}, nil 112 } 113 114 filterPods := make([]*v1.Pod, 0, len(request.ActivePods)) 115 selector := r.evictionConfig.RSSOveruseEvictionFilter.AsSelector() 116 117 for i := range request.ActivePods { 118 pod := request.ActivePods[i] 119 set := (labels.Set)(pod.Labels) 120 if selector.Matches(set) { 121 filterPods = append(filterPods, pod) 122 } 123 } 124 125 for i := range filterPods { 126 pod := filterPods[i] 127 128 qosLevel, err := r.qosConf.GetQoSLevelForPod(pod) 129 if err != nil { 130 general.Errorf("get qos level failed for pod %+v/%+v, skip check rss overuse, err: %v", pod.Namespace, pod.Name, err) 131 continue 132 } 133 134 if !r.supportedQosLevels.Has(qosLevel) { 135 continue 136 } 137 138 userSpecifiedThreshold, invalid := qos.GetRSSOverUseEvictThreshold(r.qosConf, pod) 139 // don't perform eviction for safety if user set an invalid threshold 140 if invalid { 141 general.Warningf("pod %+v/%+v set invalid overuse eviction threshold, skip check rss overuse", pod.Namespace, pod.Name) 142 continue 143 } 144 145 threshold := dynamicConfig.RSSOveruseRateThreshold 146 // user set threshold explicitly,use default value 147 if userSpecifiedThreshold != nil { 148 threshold = *userSpecifiedThreshold 149 } 150 151 var memRequest int64 = 0 152 requestNotSet := false 153 for _, container := range pod.Spec.Containers { 154 containerMemRequest := container.Resources.Requests.Memory() 155 if containerMemRequest.IsZero() { 156 requestNotSet = true 157 continue 158 } 159 memRequest += containerMemRequest.Value() 160 } 161 162 // if there is at least one container without memory limit, skip it 163 if requestNotSet { 164 continue 165 } 166 167 podRss, err := helper.GetPodMetric(r.metaServer.MetricsFetcher, r.emitter, pod, consts.MetricMemRssContainer, nonExistNumaID) 168 if err != nil { 169 _ = r.emitter.StoreInt64(metricsNameFetchMetricError, 1, metrics.MetricTypeNameCount, 170 metrics.ConvertMapToTags(map[string]string{ 171 metricsTagKeyNumaID: strconv.Itoa(nonExistNumaID), 172 })...) 173 continue 174 } 175 176 if podRss > threshold*float64(memRequest) { 177 result = append(result, &pluginapi.EvictPod{ 178 Pod: pod, 179 Reason: fmt.Sprintf(RssOveruseEvictionReason, threshold, podRss, memRequest), 180 ForceEvict: false, 181 }) 182 } 183 } 184 185 return &pluginapi.GetEvictPodsResponse{EvictPods: result}, nil 186 } 187 188 func (r *RssOveruseEvictionPlugin) Start() { 189 return 190 }