github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/evictionmanager/plugin/memory/system_pressure.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package memory
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"strconv"
    24  	"sync"
    25  	"time"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	"k8s.io/client-go/tools/events"
    30  
    31  	pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1"
    32  	"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin"
    33  	"github.com/kubewharf/katalyst-core/pkg/client"
    34  	"github.com/kubewharf/katalyst-core/pkg/config"
    35  	"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
    36  	"github.com/kubewharf/katalyst-core/pkg/consts"
    37  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    38  	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper"
    39  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    40  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    41  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    42  	"github.com/kubewharf/katalyst-core/pkg/util/process"
    43  )
    44  
    45  const (
    46  	EvictionPluginNameSystemMemoryPressure = "system-memory-pressure-eviction-plugin"
    47  	EvictionScopeSystemMemory              = "SystemMemory"
    48  	evictionConditionMemoryPressure        = "MemoryPressure"
    49  	systemMemoryPressureHealthCheck        = "system_memory_pressure_eviction_detect"
    50  	syncTolerationTurns                    = 3
    51  )
    52  
    53  func NewSystemPressureEvictionPlugin(_ *client.GenericClientSet, _ events.EventRecorder,
    54  	metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, conf *config.Configuration,
    55  ) plugin.EvictionPlugin {
    56  	p := &SystemPressureEvictionPlugin{
    57  		pluginName:                EvictionPluginNameSystemMemoryPressure,
    58  		emitter:                   emitter,
    59  		StopControl:               process.NewStopControl(time.Time{}),
    60  		metaServer:                metaServer,
    61  		evictionManagerSyncPeriod: conf.EvictionManagerSyncPeriod,
    62  		coolDownPeriod:            conf.SystemPressureCoolDownPeriod,
    63  		syncPeriod:                time.Duration(conf.SystemPressureSyncPeriod) * time.Second,
    64  		dynamicConfig:             conf.DynamicAgentConfiguration,
    65  		reclaimedPodFilter:        conf.CheckReclaimedQoSForPod,
    66  		evictionHelper:            NewEvictionHelper(emitter, metaServer, conf),
    67  	}
    68  	return p
    69  }
    70  
    71  // SystemPressureEvictionPlugin implements the EvictPlugin interface.
    72  // It triggers pod eviction based on the system pressure of memory.
    73  type SystemPressureEvictionPlugin struct {
    74  	*process.StopControl
    75  	sync.Mutex
    76  
    77  	emitter                   metrics.MetricEmitter
    78  	reclaimedPodFilter        func(pod *v1.Pod) (bool, error)
    79  	evictionManagerSyncPeriod time.Duration
    80  	pluginName                string
    81  	metaServer                *metaserver.MetaServer
    82  	evictionHelper            *EvictionHelper
    83  
    84  	syncPeriod     time.Duration
    85  	coolDownPeriod int
    86  	dynamicConfig  *dynamic.DynamicAgentConfiguration
    87  
    88  	systemAction                   int
    89  	isUnderSystemPressure          bool
    90  	kswapdStealPreviousCycle       float64
    91  	kswapdStealPreviousCycleTime   time.Time
    92  	kswapdStealRateExceedStartTime *time.Time
    93  	lastEvictionTime               time.Time
    94  }
    95  
    96  func (s *SystemPressureEvictionPlugin) Name() string {
    97  	if s == nil {
    98  		return ""
    99  	}
   100  
   101  	return s.pluginName
   102  }
   103  
   104  func (s *SystemPressureEvictionPlugin) Start() {
   105  	general.RegisterHeartbeatCheck(systemMemoryPressureHealthCheck, syncTolerationTurns*s.syncPeriod,
   106  		general.HealthzCheckStateNotReady, syncTolerationTurns*s.syncPeriod)
   107  	go wait.UntilWithContext(context.TODO(), s.detectSystemPressures, s.syncPeriod)
   108  }
   109  
   110  func (s *SystemPressureEvictionPlugin) ThresholdMet(_ context.Context) (*pluginapi.ThresholdMetResponse, error) {
   111  	resp := &pluginapi.ThresholdMetResponse{
   112  		MetType: pluginapi.ThresholdMetType_NOT_MET,
   113  	}
   114  
   115  	dynamicConfig := s.dynamicConfig.GetDynamicConfiguration()
   116  	if !dynamicConfig.EnableSystemLevelEviction {
   117  		return resp, nil
   118  	}
   119  
   120  	// TODO maybe we should set timeout for this lock operation in case it blocks the entire sync loop
   121  	s.Lock()
   122  	defer s.Unlock()
   123  
   124  	if s.isUnderSystemPressure {
   125  		resp = &pluginapi.ThresholdMetResponse{
   126  			MetType:       pluginapi.ThresholdMetType_HARD_MET,
   127  			EvictionScope: EvictionScopeSystemMemory,
   128  			Condition: &pluginapi.Condition{
   129  				ConditionType: pluginapi.ConditionType_NODE_CONDITION,
   130  				Effects:       []string{string(v1.TaintEffectNoSchedule)},
   131  				ConditionName: evictionConditionMemoryPressure,
   132  				MetCondition:  true,
   133  			},
   134  		}
   135  	}
   136  
   137  	general.Infof("ThresholdMet result, m.isUnderSystemPressure: %+v, m.systemAction: %+v", s.isUnderSystemPressure, s.systemAction)
   138  
   139  	return resp, nil
   140  }
   141  
   142  func (s *SystemPressureEvictionPlugin) detectSystemPressures(_ context.Context) {
   143  	s.Lock()
   144  	defer s.Unlock()
   145  	var err error
   146  	defer func() {
   147  		_ = general.UpdateHealthzStateByError(systemMemoryPressureHealthCheck, err)
   148  	}()
   149  
   150  	s.isUnderSystemPressure = false
   151  	s.systemAction = actionNoop
   152  
   153  	err = s.detectSystemWatermarkPressure()
   154  	err = s.detectSystemKswapdStealPressure()
   155  
   156  	switch s.systemAction {
   157  	case actionReclaimedEviction:
   158  		_ = s.emitter.StoreInt64(metricsNameThresholdMet, 1, metrics.MetricTypeNameCount,
   159  			metrics.ConvertMapToTags(map[string]string{
   160  				metricsTagKeyEvictionScope:  EvictionScopeSystemMemory,
   161  				metricsTagKeyDetectionLevel: metricsTagValueDetectionLevelSystem,
   162  				metricsTagKeyAction:         metricsTagValueActionReclaimedEviction,
   163  			})...)
   164  	case actionEviction:
   165  		_ = s.emitter.StoreInt64(metricsNameThresholdMet, 1, metrics.MetricTypeNameCount,
   166  			metrics.ConvertMapToTags(map[string]string{
   167  				metricsTagKeyEvictionScope:  EvictionScopeSystemMemory,
   168  				metricsTagKeyDetectionLevel: metricsTagValueDetectionLevelSystem,
   169  				metricsTagKeyAction:         metricsTagValueActionEviction,
   170  			})...)
   171  	}
   172  }
   173  
   174  func (s *SystemPressureEvictionPlugin) detectSystemWatermarkPressure() error {
   175  	free, total, scaleFactor, err := helper.GetWatermarkMetrics(s.metaServer.MetricsFetcher, s.emitter, nonExistNumaID)
   176  	if err != nil {
   177  		_ = s.emitter.StoreInt64(metricsNameFetchMetricError, 1, metrics.MetricTypeNameCount,
   178  			metrics.ConvertMapToTags(map[string]string{
   179  				metricsTagKeyNumaID: strconv.Itoa(nonExistNumaID),
   180  			})...)
   181  		general.Errorf("failed to getWatermarkMetrics for system, err: %v", err)
   182  		return err
   183  	}
   184  
   185  	thresholdMinimum := float64(s.dynamicConfig.GetDynamicConfiguration().SystemFreeMemoryThresholdMinimum)
   186  	threshold := math.Max(thresholdMinimum, total*scaleFactor/10000)
   187  
   188  	general.Infof("system watermark metrics, "+
   189  		"free: %+v, total: %+v, scaleFactor: %+v, configuration minimum: %+v, final threshold: %+v",
   190  		free, total, scaleFactor, thresholdMinimum, threshold)
   191  
   192  	if free < threshold {
   193  		s.isUnderSystemPressure = true
   194  		s.systemAction = actionReclaimedEviction
   195  	}
   196  	return nil
   197  }
   198  
   199  func (s *SystemPressureEvictionPlugin) detectSystemKswapdStealPressure() error {
   200  	kswapdSteal, err := helper.GetNodeMetricWithTime(s.metaServer.MetricsFetcher, s.emitter, consts.MetricMemKswapdstealSystem)
   201  	if err != nil {
   202  		s.kswapdStealPreviousCycle = kswapdStealPreviousCycleMissing
   203  		s.kswapdStealPreviousCycleTime = time.Now()
   204  		_ = s.emitter.StoreInt64(metricsNameFetchMetricError, 1, metrics.MetricTypeNameCount,
   205  			metrics.ConvertMapToTags(map[string]string{
   206  				metricsTagKeyNumaID: strconv.Itoa(nonExistNumaID),
   207  			})...)
   208  		general.Errorf("failed to getSystemKswapdStealMetrics, err: %v", err)
   209  		return err
   210  	}
   211  
   212  	if kswapdSteal.Time.Equal(s.kswapdStealPreviousCycleTime) {
   213  		general.Warningf("getSystemKswapdStealMetrics get same result as last round,skip current round")
   214  		return nil
   215  	}
   216  
   217  	dynamicConfig := s.dynamicConfig.GetDynamicConfiguration()
   218  	general.Infof("system kswapd metrics, "+
   219  		"kswapdSteal: %+v, kswapdStealPreviousCycle: %+v, kswapdStealPreviousCycleTime: %+v, systemKswapdRateThreshold: %+v, evictionManagerSyncPeriod: %+v, "+
   220  		"kswapdStealRateExceedStartTime: %+v, SystemKswapdRateExceedDurationThreshold: %+v",
   221  		kswapdSteal, s.kswapdStealPreviousCycle, s.kswapdStealPreviousCycleTime, dynamicConfig.SystemKswapdRateThreshold,
   222  		s.evictionManagerSyncPeriod.Seconds(), s.kswapdStealRateExceedStartTime,
   223  		dynamicConfig.SystemKswapdRateExceedDurationThreshold)
   224  	if s.kswapdStealRateExceedStartTime != nil && !s.kswapdStealRateExceedStartTime.IsZero() {
   225  		duration := kswapdSteal.Time.Sub(*s.kswapdStealRateExceedStartTime)
   226  		_ = s.emitter.StoreFloat64(metricsNameSystemMetric, duration.Seconds(), metrics.MetricTypeNameRaw,
   227  			metrics.ConvertMapToTags(map[string]string{
   228  				metricsTagKeyMetricName: metricsTagValueSystemKswapdRateExceedDuration,
   229  			})...)
   230  	}
   231  	_ = s.emitter.StoreFloat64(metricsNameSystemMetric, kswapdSteal.Value-s.kswapdStealPreviousCycle, metrics.MetricTypeNameRaw,
   232  		metrics.ConvertMapToTags(map[string]string{
   233  			metricsTagKeyMetricName: metricsTagValueSystemKswapdDiff,
   234  		})...)
   235  
   236  	kswapdStealPreviousCycle := s.kswapdStealPreviousCycle
   237  	kswapdStealPreviousCycleTime := s.kswapdStealPreviousCycleTime
   238  	s.kswapdStealPreviousCycle = kswapdSteal.Value
   239  	s.kswapdStealPreviousCycleTime = *(kswapdSteal.Time)
   240  	if kswapdStealPreviousCycle == kswapdStealPreviousCycleMissing {
   241  		general.Warningf("kswapd steal of the previous cycle is missing")
   242  		return nil
   243  	}
   244  
   245  	if (kswapdSteal.Value-kswapdStealPreviousCycle)/(kswapdSteal.Time.Sub(kswapdStealPreviousCycleTime)).Seconds() >= float64(dynamicConfig.SystemKswapdRateThreshold) {
   246  		// the pressure continues,if there is no recorded start time,we record the previous cycle time as the pressure start time
   247  		if s.kswapdStealRateExceedStartTime == nil || s.kswapdStealRateExceedStartTime.IsZero() {
   248  			s.kswapdStealRateExceedStartTime = &kswapdStealPreviousCycleTime
   249  		}
   250  	} else {
   251  		// there is no pressure anymore, clear the start time
   252  		s.kswapdStealRateExceedStartTime = nil
   253  	}
   254  
   255  	if s.kswapdStealRateExceedStartTime != nil && !s.kswapdStealRateExceedStartTime.IsZero() {
   256  		pressureDuration := kswapdSteal.Time.Sub(*(s.kswapdStealRateExceedStartTime)).Seconds()
   257  		if int(pressureDuration) >= dynamicConfig.SystemKswapdRateExceedDurationThreshold {
   258  			s.isUnderSystemPressure = true
   259  			s.systemAction = actionEviction
   260  		}
   261  	}
   262  	return nil
   263  }
   264  
   265  func (s *SystemPressureEvictionPlugin) GetTopEvictionPods(_ context.Context, request *pluginapi.GetTopEvictionPodsRequest) (*pluginapi.GetTopEvictionPodsResponse, error) {
   266  	if request == nil {
   267  		return nil, fmt.Errorf("GetTopEvictionPods got nil request")
   268  	}
   269  
   270  	if len(request.ActivePods) == 0 {
   271  		general.Warningf("GetTopEvictionPods got empty active pods list")
   272  		return &pluginapi.GetTopEvictionPodsResponse{}, nil
   273  	}
   274  
   275  	now := time.Now()
   276  	if !s.lastEvictionTime.IsZero() && now.Sub(s.lastEvictionTime) < time.Duration(s.coolDownPeriod)*time.Second {
   277  		general.Infof("in eviction cool-down time, skip eviction. now: %s, lastEvictionTime: %s",
   278  			now.String(), s.lastEvictionTime.String())
   279  		return &pluginapi.GetTopEvictionPodsResponse{}, nil
   280  	}
   281  	s.lastEvictionTime = now
   282  
   283  	dynamicConfig := s.dynamicConfig.GetDynamicConfiguration()
   284  	targetPods := make([]*v1.Pod, 0, len(request.ActivePods))
   285  	podToEvictMap := make(map[string]*v1.Pod)
   286  
   287  	general.Infof("GetTopEvictionPods condition, m.isUnderSystemPressure: %+v, "+
   288  		"m.systemAction: %+v", s.isUnderSystemPressure, s.systemAction)
   289  
   290  	if dynamicConfig.EnableSystemLevelEviction && s.isUnderSystemPressure {
   291  		s.evictionHelper.selectTopNPodsToEvictByMetrics(request.ActivePods, request.TopN, nonExistNumaID, s.systemAction,
   292  			dynamicConfig.SystemEvictionRankingMetrics, podToEvictMap)
   293  	}
   294  
   295  	for uid := range podToEvictMap {
   296  		targetPods = append(targetPods, podToEvictMap[uid])
   297  	}
   298  
   299  	_ = s.emitter.StoreInt64(metricsNameNumberOfTargetPods, int64(len(targetPods)), metrics.MetricTypeNameRaw)
   300  	general.Infof("GetTopEvictionPods result, targetPods: %+v", native.GetNamespacedNameListFromSlice(targetPods))
   301  
   302  	resp := &pluginapi.GetTopEvictionPodsResponse{
   303  		TargetPods: targetPods,
   304  	}
   305  	if gracePeriod := dynamicConfig.MemoryPressureEvictionConfiguration.GracePeriod; gracePeriod > 0 {
   306  		resp.DeletionOptions = &pluginapi.DeletionOptions{
   307  			GracePeriodSeconds: gracePeriod,
   308  		}
   309  	}
   310  
   311  	return resp, nil
   312  }
   313  
   314  func (s *SystemPressureEvictionPlugin) GetEvictPods(_ context.Context, request *pluginapi.GetEvictPodsRequest) (*pluginapi.GetEvictPodsResponse, error) {
   315  	if request == nil {
   316  		return nil, fmt.Errorf("GetEvictPods got nil request")
   317  	}
   318  
   319  	return &pluginapi.GetEvictPodsResponse{}, nil
   320  }