github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/eviction_resp_collector.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package evictionmanager
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"strings"
    23  
    24  	//nolint
    25  	"github.com/golang/protobuf/proto"
    26  	v1 "k8s.io/api/core/v1"
    27  
    28  	pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1"
    29  	"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/rule"
    30  	pkgconfig "github.com/kubewharf/katalyst-core/pkg/config"
    31  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    32  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    33  )
    34  
    35  const effectTagValueSeparator = "_"
    36  
    37  // evictionRespCollector is used to collect eviction result from plugins, it also handles some logic such as dry run.
    38  type evictionRespCollector struct {
    39  	conf *pkgconfig.Configuration
    40  
    41  	currentMetThresholds map[string]*pluginapi.ThresholdMetResponse
    42  	currentConditions    map[string]*pluginapi.Condition
    43  
    44  	// softEvictPods are candidates (among which only one will be chosen);
    45  	// forceEvictPods are pods that should be killed immediately (but can be withdrawn)
    46  	softEvictPods  map[string]*rule.RuledEvictPod
    47  	forceEvictPods map[string]*rule.RuledEvictPod
    48  
    49  	// emitter is used to emit metrics.
    50  	emitter metrics.MetricEmitter
    51  }
    52  
    53  func newEvictionRespCollector(dryRun []string, conf *pkgconfig.Configuration, emitter metrics.MetricEmitter) *evictionRespCollector {
    54  	collector := &evictionRespCollector{
    55  		conf:                 conf,
    56  		currentMetThresholds: make(map[string]*pluginapi.ThresholdMetResponse),
    57  		currentConditions:    make(map[string]*pluginapi.Condition),
    58  
    59  		softEvictPods:  make(map[string]*rule.RuledEvictPod),
    60  		forceEvictPods: make(map[string]*rule.RuledEvictPod),
    61  
    62  		emitter: emitter,
    63  	}
    64  	general.Infof("dry run plugins is %v", dryRun)
    65  	return collector
    66  }
    67  
    68  func (e *evictionRespCollector) isDryRun(dryRunPlugins []string, pluginName string) bool {
    69  	if len(dryRunPlugins) == 0 {
    70  		return false
    71  	}
    72  
    73  	return general.IsNameEnabled(pluginName, nil, dryRunPlugins)
    74  }
    75  
    76  func (e *evictionRespCollector) getLogPrefix(dryRun bool) string {
    77  	if dryRun {
    78  		return "[DryRun]"
    79  	}
    80  
    81  	return ""
    82  }
    83  
    84  func (e *evictionRespCollector) collectEvictPods(dryRunPlugins []string, pluginName string, resp *pluginapi.GetEvictPodsResponse) {
    85  	dryRun := e.isDryRun(dryRunPlugins, pluginName)
    86  
    87  	evictPods := make([]*pluginapi.EvictPod, 0, len(resp.EvictPods))
    88  	for i, evictPod := range resp.EvictPods {
    89  		if evictPod == nil || evictPod.Pod == nil {
    90  			general.Errorf("%v skip nil evict pod of plugin: %s", e.getLogPrefix(dryRun), pluginName)
    91  			continue
    92  		}
    93  
    94  		general.Infof("%v plugin: %s requests to evict pod: %s/%s with reason: %s, forceEvict: %v",
    95  			e.getLogPrefix(dryRun), pluginName, evictPod.Pod.Namespace, evictPod.Pod.Name, evictPod.Reason, evictPod.ForceEvict)
    96  
    97  		if dryRun {
    98  			metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, evictPod.Pod, dryRun, e.conf.GenericEvictionConfiguration.PodMetricLabels)
    99  		} else {
   100  			evictPods = append(evictPods, resp.EvictPods[i])
   101  		}
   102  	}
   103  
   104  	for _, evictPod := range evictPods {
   105  
   106  		// to avoid plugins forget to set EvictionPluginName property
   107  		evictPod.EvictionPluginName = pluginName
   108  
   109  		if evictPod.ForceEvict {
   110  			e.getForceEvictPods()[string(evictPod.Pod.UID)] = &rule.RuledEvictPod{
   111  				EvictPod: proto.Clone(evictPod).(*pluginapi.EvictPod),
   112  				Scope:    rule.EvictionScopeForce,
   113  			}
   114  		} else {
   115  			e.getSoftEvictPods()[string(evictPod.Pod.UID)] = &rule.RuledEvictPod{
   116  				EvictPod: proto.Clone(evictPod).(*pluginapi.EvictPod),
   117  				Scope:    rule.EvictionScopeSoft,
   118  			}
   119  		}
   120  	}
   121  
   122  	if resp.Condition != nil && resp.Condition.MetCondition {
   123  		general.Infof("%v plugin: %s requests set condition: %s of type: %s",
   124  			e.getLogPrefix(dryRun), pluginName, resp.Condition.ConditionName, resp.Condition.ConditionType.String())
   125  
   126  		if !dryRun {
   127  			e.getCurrentConditions()[resp.Condition.ConditionName] = proto.Clone(resp.Condition).(*pluginapi.Condition)
   128  		}
   129  	}
   130  }
   131  
   132  func (e *evictionRespCollector) collectMetThreshold(dryRunPlugins []string, pluginName string, resp *pluginapi.ThresholdMetResponse) {
   133  	dryRun := e.isDryRun(dryRunPlugins, pluginName)
   134  
   135  	if resp.MetType == pluginapi.ThresholdMetType_NOT_MET {
   136  		general.InfofV(6, "%v plugin: %s threshold isn't met", e.getLogPrefix(dryRun), pluginName)
   137  		return
   138  	}
   139  
   140  	// save thresholds to currentMetThreshold even in dry run mode so that GetTopEvictionPods function will be called
   141  	e.getCurrentMetThresholds()[pluginName] = proto.Clone(resp).(*pluginapi.ThresholdMetResponse)
   142  
   143  	general.Infof("%v plugin: %s met threshold: %s", e.getLogPrefix(dryRun), pluginName, resp.String())
   144  	if resp.Condition != nil && resp.Condition.MetCondition {
   145  		general.Infof("%v plugin: %s requests to set condition: %s of type: %s",
   146  			e.getLogPrefix(dryRun), pluginName, resp.Condition.ConditionName, resp.Condition.ConditionType.String())
   147  		_ = e.emitter.StoreInt64(MetricsNameRequestConditionCNT, 1, metrics.MetricTypeNameRaw,
   148  			metrics.MetricTag{Key: "name", Val: pluginName},
   149  			metrics.MetricTag{Key: "condition_name", Val: resp.Condition.ConditionName},
   150  			metrics.MetricTag{Key: "condition_type", Val: fmt.Sprint(resp.Condition.ConditionType)},
   151  			metrics.MetricTag{Key: "effects", Val: strings.Join(resp.Condition.Effects, effectTagValueSeparator)},
   152  			metrics.MetricTag{Key: "dryrun", Val: strconv.FormatBool(dryRun)},
   153  		)
   154  
   155  		if !dryRun {
   156  			e.getCurrentConditions()[resp.Condition.ConditionName] = proto.Clone(resp.Condition).(*pluginapi.Condition)
   157  		}
   158  	}
   159  }
   160  
   161  func (e *evictionRespCollector) collectTopEvictionPods(dryRunPlugins []string, pluginName string,
   162  	threshold *pluginapi.ThresholdMetResponse, resp *pluginapi.GetTopEvictionPodsResponse,
   163  ) {
   164  	dryRun := e.isDryRun(dryRunPlugins, pluginName)
   165  
   166  	targetPods := make([]*v1.Pod, 0, len(resp.TargetPods))
   167  	for i, pod := range resp.TargetPods {
   168  		if pod == nil {
   169  			continue
   170  		}
   171  
   172  		general.Infof("%v plugin %v request to evict topN pod %v/%v, reason: met threshold in scope [%v]",
   173  			e.getLogPrefix(dryRun), pluginName, pod.Namespace, pod.Name, threshold.EvictionScope)
   174  		if dryRun {
   175  			metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, pod, dryRun, e.conf.GenericEvictionConfiguration.PodMetricLabels)
   176  		} else {
   177  			targetPods = append(targetPods, resp.TargetPods[i])
   178  		}
   179  	}
   180  
   181  	for _, pod := range targetPods {
   182  		deletionOptions := resp.DeletionOptions
   183  		reason := fmt.Sprintf("plugin %s met threshold in scope %s, target %v, observed %v",
   184  			pluginName, threshold.EvictionScope, threshold.ThresholdValue, threshold.ObservedValue)
   185  
   186  		forceEvictPod := e.getForceEvictPods()[string(pod.UID)]
   187  		if forceEvictPod != nil {
   188  			if deletionOptions != nil && forceEvictPod.EvictPod.DeletionOptions != nil {
   189  				deletionOptions.GracePeriodSeconds = general.MaxInt64(deletionOptions.GracePeriodSeconds,
   190  					forceEvictPod.EvictPod.DeletionOptions.GracePeriodSeconds)
   191  			} else if forceEvictPod.EvictPod.DeletionOptions != nil {
   192  				deletionOptions.GracePeriodSeconds = forceEvictPod.EvictPod.DeletionOptions.GracePeriodSeconds
   193  			}
   194  			reason = fmt.Sprintf("%s; %s", reason, forceEvictPod.EvictPod.Reason)
   195  		}
   196  
   197  		e.getForceEvictPods()[string(pod.UID)] = &rule.RuledEvictPod{
   198  			EvictPod: &pluginapi.EvictPod{
   199  				Pod:                pod.DeepCopy(),
   200  				Reason:             reason,
   201  				DeletionOptions:    deletionOptions,
   202  				ForceEvict:         true,
   203  				EvictionPluginName: pluginName, // only count this pod to one plugin
   204  			},
   205  			Scope: threshold.EvictionScope,
   206  		}
   207  	}
   208  }
   209  
   210  func (e *evictionRespCollector) getCurrentConditions() map[string]*pluginapi.Condition {
   211  	return e.currentConditions
   212  }
   213  
   214  func (e *evictionRespCollector) getCurrentMetThresholds() map[string]*pluginapi.ThresholdMetResponse {
   215  	return e.currentMetThresholds
   216  }
   217  
   218  func (e *evictionRespCollector) getSoftEvictPods() map[string]*rule.RuledEvictPod {
   219  	return e.softEvictPods
   220  }
   221  
   222  func (e *evictionRespCollector) getForceEvictPods() map[string]*rule.RuledEvictPod {
   223  	return e.forceEvictPods
   224  }