github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/eviction_resp_collector.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package evictionmanager 18 19 import ( 20 "fmt" 21 "strconv" 22 "strings" 23 24 //nolint 25 "github.com/golang/protobuf/proto" 26 v1 "k8s.io/api/core/v1" 27 28 pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1" 29 "github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/rule" 30 pkgconfig "github.com/kubewharf/katalyst-core/pkg/config" 31 "github.com/kubewharf/katalyst-core/pkg/metrics" 32 "github.com/kubewharf/katalyst-core/pkg/util/general" 33 ) 34 35 const effectTagValueSeparator = "_" 36 37 // evictionRespCollector is used to collect eviction result from plugins, it also handles some logic such as dry run. 38 type evictionRespCollector struct { 39 conf *pkgconfig.Configuration 40 41 currentMetThresholds map[string]*pluginapi.ThresholdMetResponse 42 currentConditions map[string]*pluginapi.Condition 43 44 // softEvictPods are candidates (among which only one will be chosen); 45 // forceEvictPods are pods that should be killed immediately (but can be withdrawn) 46 softEvictPods map[string]*rule.RuledEvictPod 47 forceEvictPods map[string]*rule.RuledEvictPod 48 49 // emitter is used to emit metrics. 50 emitter metrics.MetricEmitter 51 } 52 53 func newEvictionRespCollector(dryRun []string, conf *pkgconfig.Configuration, emitter metrics.MetricEmitter) *evictionRespCollector { 54 collector := &evictionRespCollector{ 55 conf: conf, 56 currentMetThresholds: make(map[string]*pluginapi.ThresholdMetResponse), 57 currentConditions: make(map[string]*pluginapi.Condition), 58 59 softEvictPods: make(map[string]*rule.RuledEvictPod), 60 forceEvictPods: make(map[string]*rule.RuledEvictPod), 61 62 emitter: emitter, 63 } 64 general.Infof("dry run plugins is %v", dryRun) 65 return collector 66 } 67 68 func (e *evictionRespCollector) isDryRun(dryRunPlugins []string, pluginName string) bool { 69 if len(dryRunPlugins) == 0 { 70 return false 71 } 72 73 return general.IsNameEnabled(pluginName, nil, dryRunPlugins) 74 } 75 76 func (e *evictionRespCollector) getLogPrefix(dryRun bool) string { 77 if dryRun { 78 return "[DryRun]" 79 } 80 81 return "" 82 } 83 84 func (e *evictionRespCollector) collectEvictPods(dryRunPlugins []string, pluginName string, resp *pluginapi.GetEvictPodsResponse) { 85 dryRun := e.isDryRun(dryRunPlugins, pluginName) 86 87 evictPods := make([]*pluginapi.EvictPod, 0, len(resp.EvictPods)) 88 for i, evictPod := range resp.EvictPods { 89 if evictPod == nil || evictPod.Pod == nil { 90 general.Errorf("%v skip nil evict pod of plugin: %s", e.getLogPrefix(dryRun), pluginName) 91 continue 92 } 93 94 general.Infof("%v plugin: %s requests to evict pod: %s/%s with reason: %s, forceEvict: %v", 95 e.getLogPrefix(dryRun), pluginName, evictPod.Pod.Namespace, evictPod.Pod.Name, evictPod.Reason, evictPod.ForceEvict) 96 97 if dryRun { 98 metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, evictPod.Pod, dryRun, e.conf.GenericEvictionConfiguration.PodMetricLabels) 99 } else { 100 evictPods = append(evictPods, resp.EvictPods[i]) 101 } 102 } 103 104 for _, evictPod := range evictPods { 105 106 // to avoid plugins forget to set EvictionPluginName property 107 evictPod.EvictionPluginName = pluginName 108 109 if evictPod.ForceEvict { 110 e.getForceEvictPods()[string(evictPod.Pod.UID)] = &rule.RuledEvictPod{ 111 EvictPod: proto.Clone(evictPod).(*pluginapi.EvictPod), 112 Scope: rule.EvictionScopeForce, 113 } 114 } else { 115 e.getSoftEvictPods()[string(evictPod.Pod.UID)] = &rule.RuledEvictPod{ 116 EvictPod: proto.Clone(evictPod).(*pluginapi.EvictPod), 117 Scope: rule.EvictionScopeSoft, 118 } 119 } 120 } 121 122 if resp.Condition != nil && resp.Condition.MetCondition { 123 general.Infof("%v plugin: %s requests set condition: %s of type: %s", 124 e.getLogPrefix(dryRun), pluginName, resp.Condition.ConditionName, resp.Condition.ConditionType.String()) 125 126 if !dryRun { 127 e.getCurrentConditions()[resp.Condition.ConditionName] = proto.Clone(resp.Condition).(*pluginapi.Condition) 128 } 129 } 130 } 131 132 func (e *evictionRespCollector) collectMetThreshold(dryRunPlugins []string, pluginName string, resp *pluginapi.ThresholdMetResponse) { 133 dryRun := e.isDryRun(dryRunPlugins, pluginName) 134 135 if resp.MetType == pluginapi.ThresholdMetType_NOT_MET { 136 general.InfofV(6, "%v plugin: %s threshold isn't met", e.getLogPrefix(dryRun), pluginName) 137 return 138 } 139 140 // save thresholds to currentMetThreshold even in dry run mode so that GetTopEvictionPods function will be called 141 e.getCurrentMetThresholds()[pluginName] = proto.Clone(resp).(*pluginapi.ThresholdMetResponse) 142 143 general.Infof("%v plugin: %s met threshold: %s", e.getLogPrefix(dryRun), pluginName, resp.String()) 144 if resp.Condition != nil && resp.Condition.MetCondition { 145 general.Infof("%v plugin: %s requests to set condition: %s of type: %s", 146 e.getLogPrefix(dryRun), pluginName, resp.Condition.ConditionName, resp.Condition.ConditionType.String()) 147 _ = e.emitter.StoreInt64(MetricsNameRequestConditionCNT, 1, metrics.MetricTypeNameRaw, 148 metrics.MetricTag{Key: "name", Val: pluginName}, 149 metrics.MetricTag{Key: "condition_name", Val: resp.Condition.ConditionName}, 150 metrics.MetricTag{Key: "condition_type", Val: fmt.Sprint(resp.Condition.ConditionType)}, 151 metrics.MetricTag{Key: "effects", Val: strings.Join(resp.Condition.Effects, effectTagValueSeparator)}, 152 metrics.MetricTag{Key: "dryrun", Val: strconv.FormatBool(dryRun)}, 153 ) 154 155 if !dryRun { 156 e.getCurrentConditions()[resp.Condition.ConditionName] = proto.Clone(resp.Condition).(*pluginapi.Condition) 157 } 158 } 159 } 160 161 func (e *evictionRespCollector) collectTopEvictionPods(dryRunPlugins []string, pluginName string, 162 threshold *pluginapi.ThresholdMetResponse, resp *pluginapi.GetTopEvictionPodsResponse, 163 ) { 164 dryRun := e.isDryRun(dryRunPlugins, pluginName) 165 166 targetPods := make([]*v1.Pod, 0, len(resp.TargetPods)) 167 for i, pod := range resp.TargetPods { 168 if pod == nil { 169 continue 170 } 171 172 general.Infof("%v plugin %v request to evict topN pod %v/%v, reason: met threshold in scope [%v]", 173 e.getLogPrefix(dryRun), pluginName, pod.Namespace, pod.Name, threshold.EvictionScope) 174 if dryRun { 175 metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, pod, dryRun, e.conf.GenericEvictionConfiguration.PodMetricLabels) 176 } else { 177 targetPods = append(targetPods, resp.TargetPods[i]) 178 } 179 } 180 181 for _, pod := range targetPods { 182 deletionOptions := resp.DeletionOptions 183 reason := fmt.Sprintf("plugin %s met threshold in scope %s, target %v, observed %v", 184 pluginName, threshold.EvictionScope, threshold.ThresholdValue, threshold.ObservedValue) 185 186 forceEvictPod := e.getForceEvictPods()[string(pod.UID)] 187 if forceEvictPod != nil { 188 if deletionOptions != nil && forceEvictPod.EvictPod.DeletionOptions != nil { 189 deletionOptions.GracePeriodSeconds = general.MaxInt64(deletionOptions.GracePeriodSeconds, 190 forceEvictPod.EvictPod.DeletionOptions.GracePeriodSeconds) 191 } else if forceEvictPod.EvictPod.DeletionOptions != nil { 192 deletionOptions.GracePeriodSeconds = forceEvictPod.EvictPod.DeletionOptions.GracePeriodSeconds 193 } 194 reason = fmt.Sprintf("%s; %s", reason, forceEvictPod.EvictPod.Reason) 195 } 196 197 e.getForceEvictPods()[string(pod.UID)] = &rule.RuledEvictPod{ 198 EvictPod: &pluginapi.EvictPod{ 199 Pod: pod.DeepCopy(), 200 Reason: reason, 201 DeletionOptions: deletionOptions, 202 ForceEvict: true, 203 EvictionPluginName: pluginName, // only count this pod to one plugin 204 }, 205 Scope: threshold.EvictionScope, 206 } 207 } 208 } 209 210 func (e *evictionRespCollector) getCurrentConditions() map[string]*pluginapi.Condition { 211 return e.currentConditions 212 } 213 214 func (e *evictionRespCollector) getCurrentMetThresholds() map[string]*pluginapi.ThresholdMetResponse { 215 return e.currentMetThresholds 216 } 217 218 func (e *evictionRespCollector) getSoftEvictPods() map[string]*rule.RuledEvictPod { 219 return e.softEvictPods 220 } 221 222 func (e *evictionRespCollector) getForceEvictPods() map[string]*rule.RuledEvictPod { 223 return e.forceEvictPods 224 }