github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/lifecycle/agent-healthz/healthz_controller.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package agent_healthz
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	corev1 "k8s.io/api/core/v1"
    25  	"k8s.io/apimachinery/pkg/labels"
    26  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    27  	"k8s.io/apimachinery/pkg/util/wait"
    28  	coreinformers "k8s.io/client-go/informers/core/v1"
    29  	corelisters "k8s.io/client-go/listers/core/v1"
    30  	"k8s.io/client-go/tools/cache"
    31  	"k8s.io/client-go/util/flowcontrol"
    32  	"k8s.io/client-go/util/retry"
    33  	"k8s.io/klog/v2"
    34  	"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
    35  
    36  	apis "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
    37  	informers "github.com/kubewharf/katalyst-api/pkg/client/informers/externalversions/node/v1alpha1"
    38  	listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1"
    39  	"github.com/kubewharf/katalyst-core/pkg/client"
    40  	"github.com/kubewharf/katalyst-core/pkg/client/control"
    41  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    42  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    43  	"github.com/kubewharf/katalyst-core/pkg/controller/lifecycle/agent-healthz/handler"
    44  	"github.com/kubewharf/katalyst-core/pkg/controller/lifecycle/agent-healthz/helper"
    45  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    46  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    47  )
    48  
    49  const AgentHealthzControllerName = "agent-healthz"
    50  
    51  const metricsNameHealthState = "health_state"
    52  
    53  const (
    54  	stateNormal            = "Normal"
    55  	stateFullDisruption    = "FullDisruption"
    56  	statePartialDisruption = "PartialDisruption"
    57  )
    58  
    59  type HealthzController struct {
    60  	ctx     context.Context
    61  	emitter metrics.MetricEmitter
    62  
    63  	nodeLister       corelisters.NodeLister
    64  	cnrLister        listers.CustomNodeResourceLister
    65  	nodeListerSynced cache.InformerSynced
    66  	podListerSynced  cache.InformerSynced
    67  	cnrListerSynced  cache.InformerSynced
    68  
    69  	taintThreshold     float32
    70  	taintLimiterQOS    float32
    71  	evictThreshold     float32
    72  	evictionLimiterQPS float32
    73  	nodeSelector       labels.Selector
    74  
    75  	taintQueue *scheduler.RateLimitedTimedQueue
    76  	evictQueue *scheduler.RateLimitedTimedQueue
    77  
    78  	taintHelper   *helper.CNRTaintHelper
    79  	evictHelper   *helper.EvictHelper
    80  	healthzHelper *helper.HealthzHelper
    81  	handlers      map[string]handler.AgentHandler
    82  }
    83  
    84  func NewHealthzController(ctx context.Context,
    85  	genericConf *generic.GenericConfiguration,
    86  	_ *controller.GenericControllerConfiguration,
    87  	conf *controller.LifeCycleConfig,
    88  	client *client.GenericClientSet,
    89  	nodeInformer coreinformers.NodeInformer,
    90  	podInformer coreinformers.PodInformer,
    91  	cnrInformer informers.CustomNodeResourceInformer,
    92  	metricsEmitter metrics.MetricEmitter,
    93  ) (*HealthzController, error) {
    94  	ec := &HealthzController{
    95  		ctx:     ctx,
    96  		emitter: metricsEmitter,
    97  
    98  		taintThreshold:     conf.DisruptionTaintThreshold,
    99  		taintLimiterQOS:    conf.TaintQPS,
   100  		evictThreshold:     conf.DisruptionEvictThreshold,
   101  		evictionLimiterQPS: conf.EvictQPS,
   102  		nodeSelector:       conf.NodeSelector,
   103  
   104  		taintQueue: scheduler.NewRateLimitedTimedQueue(flowcontrol.NewTokenBucketRateLimiter(conf.TaintQPS, scheduler.EvictionRateLimiterBurst)),
   105  		evictQueue: scheduler.NewRateLimitedTimedQueue(flowcontrol.NewTokenBucketRateLimiter(conf.EvictQPS, scheduler.EvictionRateLimiterBurst)),
   106  
   107  		handlers: make(map[string]handler.AgentHandler),
   108  	}
   109  
   110  	var (
   111  		cnrControl control.CNRControl = control.DummyCNRControl{}
   112  		podControl control.PodEjector = control.DummyPodEjector{}
   113  	)
   114  	if !genericConf.DryRun && !conf.DryRun {
   115  		cnrControl = control.NewCNRControlImpl(client.InternalClient)
   116  		podControl = control.NewRealPodEjector(client.KubeClient)
   117  	}
   118  
   119  	ec.nodeListerSynced = nodeInformer.Informer().HasSynced
   120  	ec.nodeLister = nodeInformer.Lister()
   121  
   122  	ec.cnrLister = cnrInformer.Lister()
   123  	ec.cnrListerSynced = cnrInformer.Informer().HasSynced
   124  
   125  	ec.podListerSynced = podInformer.Informer().HasSynced
   126  	podIndexer := podInformer.Informer().GetIndexer()
   127  
   128  	if err := native.AddNodeNameIndexerForPod(podInformer); err != nil {
   129  		return nil, err
   130  	}
   131  
   132  	if metricsEmitter == nil {
   133  		ec.emitter = metrics.DummyMetrics{}
   134  	} else {
   135  		ec.emitter = metricsEmitter.WithTags("agent-healthz")
   136  	}
   137  
   138  	ec.healthzHelper = helper.NewHealthzHelper(ctx, conf, ec.emitter, ec.nodeSelector, conf.AgentSelector, podIndexer, ec.nodeLister, ec.cnrLister)
   139  	ec.taintHelper = helper.NewTaintHelper(ctx, ec.emitter, cnrControl, ec.nodeLister, ec.cnrLister, ec.taintQueue, ec.healthzHelper)
   140  	ec.evictHelper = helper.NewEvictHelper(ctx, ec.emitter, podControl, ec.nodeLister, ec.cnrLister, ec.evictQueue, ec.healthzHelper)
   141  
   142  	registeredHandlerFuncs := handler.GetRegisterAgentHandlerFuncs()
   143  	for agent := range conf.AgentSelector {
   144  		initFunc := handler.NewGenericAgentHandler
   145  
   146  		if handlerName, handlerExist := conf.AgentHandlers[agent]; handlerExist {
   147  			if handlerFunc, funcExist := registeredHandlerFuncs[handlerName]; funcExist {
   148  				initFunc = handlerFunc
   149  			}
   150  		}
   151  
   152  		ec.handlers[agent] = initFunc(ctx, agent, ec.emitter, genericConf, conf,
   153  			ec.nodeSelector, podIndexer, ec.nodeLister, ec.cnrLister, ec.healthzHelper)
   154  	}
   155  
   156  	native.SetPodTransformer(podTransformerFunc)
   157  	return ec, nil
   158  }
   159  
   160  func (ec *HealthzController) Run() {
   161  	defer utilruntime.HandleCrash()
   162  	defer klog.Infof("shutting down %s controller", AgentHealthzControllerName)
   163  
   164  	if !cache.WaitForCacheSync(ec.ctx.Done(), ec.nodeListerSynced, ec.cnrListerSynced, ec.podListerSynced) {
   165  		utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", AgentHealthzControllerName))
   166  		return
   167  	}
   168  	klog.Infof("caches are synced for %s controller", AgentHealthzControllerName)
   169  
   170  	go wait.Until(ec.syncAgentHealth, time.Second*30, ec.ctx.Done())
   171  	ec.healthzHelper.Run()
   172  	ec.taintHelper.Run()
   173  	ec.evictHelper.Run()
   174  	<-ec.ctx.Done()
   175  }
   176  
   177  // syncAgentHealth is the main logic of health checking,
   178  // and it will push those to-be-handled node into a standard queue
   179  func (ec *HealthzController) syncAgentHealth() {
   180  	nodes, err := ec.nodeLister.List(ec.nodeSelector)
   181  	if err != nil {
   182  		klog.Errorf("list node with select %s err %v", ec.nodeSelector.String(), err)
   183  		return
   184  	}
   185  
   186  	taints := make(map[string]*helper.CNRTaintItem)
   187  	evicts := make(map[string]*helper.EvictItem)
   188  	for _, node := range nodes {
   189  		for _, h := range ec.handlers {
   190  			if item, ok := h.GetCNRTaintInfo(node.Name); ok && item != nil && item.Taints != nil {
   191  				if _, exist := taints[node.Name]; !exist {
   192  					taints[node.Name] = &helper.CNRTaintItem{
   193  						Taints: make(map[string]*apis.Taint),
   194  					}
   195  				}
   196  
   197  				for t, taint := range item.Taints {
   198  					taints[node.Name].Taints[t] = taint
   199  				}
   200  			}
   201  
   202  			if item, ok := h.GetEvictionInfo(node.Name); ok && item != nil && len(item.PodKeys) > 0 {
   203  				if _, exist := evicts[node.Name]; !exist {
   204  					evicts[node.Name] = &helper.EvictItem{
   205  						PodKeys: make(map[string][]string),
   206  					}
   207  				}
   208  
   209  				for agent, pods := range item.PodKeys {
   210  					evicts[node.Name].PodKeys[agent] = pods
   211  				}
   212  			}
   213  		}
   214  	}
   215  
   216  	klog.Infof("we need to taint %v nodes, evict %v nodes in total", len(taints), len(evicts))
   217  
   218  	taintState := ec.computeClusterState(len(nodes), len(taints), ec.taintThreshold)
   219  	ec.handleTaintDisruption(taintState)
   220  	for _, node := range nodes {
   221  		if item, ok := taints[node.Name]; ok {
   222  			ec.taintQueue.Add(node.Name, item)
   223  		} else {
   224  			ec.taintQueue.Remove(node.Name)
   225  			if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
   226  				return ec.taintHelper.TryUNTaintCNR(node.Name)
   227  			}); err != nil {
   228  				klog.Infof("untaint %v err: %v", node.Name, err)
   229  			}
   230  		}
   231  	}
   232  
   233  	evictState := ec.computeClusterState(len(nodes), len(evicts), ec.evictThreshold)
   234  	ec.handleEvictDisruption(evictState)
   235  	for _, node := range nodes {
   236  		if _, ok := evicts[node.Name]; ok {
   237  			ec.evictQueue.Add(node.Name, evicts[node.Name])
   238  		} else {
   239  			ec.evictQueue.Remove(node.Name)
   240  		}
   241  	}
   242  }
   243  
   244  // computeClusterState returns a slice of conditions considering all nodes in a zone
   245  // - fullyDisrupted if there are no Ready Nodes
   246  // - partiallyDisrupted if at less than nc.unhealthyZoneThreshold percent of Nodes are not Ready
   247  // - normal otherwise
   248  func (ec *HealthzController) computeClusterState(readyNodes, notReadyNodes int, threshold float32) string {
   249  	switch {
   250  	case readyNodes == 0 && notReadyNodes > 0:
   251  		return stateFullDisruption
   252  	case notReadyNodes > 2 && float32(notReadyNodes)/float32(notReadyNodes+readyNodes) > threshold:
   253  		return statePartialDisruption
   254  	default:
   255  		return stateNormal
   256  	}
   257  }
   258  
   259  // handleTaintDisruption is used as a protection logic, if the cluster fall into
   260  // unhealthy state in a large scope, perhaps something goes wrong, we should hold on tainting
   261  func (ec *HealthzController) handleTaintDisruption(healthState string) {
   262  	if healthState == stateFullDisruption || healthState == statePartialDisruption {
   263  		ec.taintQueue.SwapLimiter(0)
   264  	} else {
   265  		ec.taintQueue.SwapLimiter(ec.taintLimiterQOS)
   266  	}
   267  
   268  	_ = ec.emitter.StoreInt64(metricsNameHealthState, 1, metrics.MetricTypeNameRaw,
   269  		[]metrics.MetricTag{
   270  			{Key: "action", Val: "taint"},
   271  			{Key: "status", Val: healthState},
   272  			{Key: "threshold", Val: fmt.Sprintf("%v", ec.taintThreshold)},
   273  		}...)
   274  	klog.Infof("controller detect taint states for nodes are %v", healthState)
   275  }
   276  
   277  // handleDisruption is used as a protection logic, if the cluster fall into
   278  // unhealthy state in a large scope, perhaps something goes wrong, we should hold on evicting
   279  func (ec *HealthzController) handleEvictDisruption(healthState string) {
   280  	if healthState == stateFullDisruption || healthState == statePartialDisruption {
   281  		ec.evictQueue.SwapLimiter(0)
   282  	} else {
   283  		ec.evictQueue.SwapLimiter(ec.taintLimiterQOS)
   284  	}
   285  
   286  	_ = ec.emitter.StoreInt64(metricsNameHealthState, 1, metrics.MetricTypeNameRaw,
   287  		[]metrics.MetricTag{
   288  			{Key: "action", Val: "evict"},
   289  			{Key: "status", Val: healthState},
   290  			{Key: "threshold", Val: fmt.Sprintf("%v", ec.evictThreshold)},
   291  		}...)
   292  	klog.Infof("controller detect taint states for nodes are %v", healthState)
   293  }
   294  
   295  func podTransformerFunc(src, dest *corev1.Pod) {
   296  	dest.Spec.NodeName = src.Spec.NodeName
   297  	containersTransformerFunc(&src.Spec.Containers, &dest.Spec.Containers)
   298  	containerStatusesTransformerFunc(&src.Status.ContainerStatuses, &dest.Status.ContainerStatuses)
   299  }
   300  
   301  func containersTransformerFunc(src, dst *[]corev1.Container) {
   302  	if src == nil || len(*src) == 0 {
   303  		return
   304  	}
   305  
   306  	if len(*dst) == 0 {
   307  		*dst = make([]corev1.Container, len(*src))
   308  	}
   309  
   310  	for i, c := range *src {
   311  		(*dst)[i].Name = c.Name
   312  	}
   313  }
   314  
   315  func containerStatusesTransformerFunc(src, dst *[]corev1.ContainerStatus) {
   316  	if src == nil || len(*src) == 0 {
   317  		return
   318  	}
   319  
   320  	if len(*dst) == 0 {
   321  		*dst = make([]corev1.ContainerStatus, len(*src))
   322  	}
   323  
   324  	for i, c := range *src {
   325  		(*dst)[i].Name = c.Name
   326  		(*dst)[i].Ready = c.Ready
   327  	}
   328  }