github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/lifecycle/agent-healthz/handler/handler_generic.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package handler
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	corev1 "k8s.io/api/core/v1"
    24  	"k8s.io/apimachinery/pkg/labels"
    25  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    26  	corelisters "k8s.io/client-go/listers/core/v1"
    27  	"k8s.io/client-go/tools/cache"
    28  	"k8s.io/klog/v2"
    29  
    30  	apis "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
    31  	listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1"
    32  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    33  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    34  	"github.com/kubewharf/katalyst-core/pkg/controller/lifecycle/agent-healthz/helper"
    35  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    36  	"github.com/kubewharf/katalyst-core/pkg/util"
    37  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    38  )
    39  
    40  const AgentHandlerGeneric = "generic"
    41  
    42  func init() {
    43  	RegisterAgentHandlerFunc(AgentHandlerGeneric, NewGenericAgentHandler)
    44  }
    45  
    46  // GenericAgentHandler implements AgentHandler with generic
    47  // actions: i.e. taint cnr and trigger eviction for reclaimed_cores
    48  type GenericAgentHandler struct {
    49  	ctx     context.Context
    50  	agent   string
    51  	emitter metrics.MetricEmitter
    52  
    53  	nodeSelector labels.Selector
    54  	qosConf      *generic.QoSConfiguration
    55  
    56  	podIndexer cache.Indexer
    57  	nodeLister corelisters.NodeLister
    58  	cnrLister  listers.CustomNodeResourceLister
    59  
    60  	checker *helper.HealthzHelper
    61  }
    62  
    63  func NewGenericAgentHandler(ctx context.Context, agent string, emitter metrics.MetricEmitter,
    64  	genericConf *generic.GenericConfiguration, _ *controller.LifeCycleConfig, nodeSelector labels.Selector,
    65  	podIndexer cache.Indexer, nodeLister corelisters.NodeLister, cnrLister listers.CustomNodeResourceLister,
    66  	checker *helper.HealthzHelper,
    67  ) AgentHandler {
    68  	return &GenericAgentHandler{
    69  		ctx:     ctx,
    70  		agent:   agent,
    71  		emitter: emitter,
    72  
    73  		nodeSelector: nodeSelector,
    74  		qosConf:      genericConf.QoSConfiguration,
    75  
    76  		podIndexer: podIndexer,
    77  		nodeLister: nodeLister,
    78  		cnrLister:  cnrLister,
    79  
    80  		checker: checker,
    81  	}
    82  }
    83  
    84  func (g *GenericAgentHandler) GetEvictionInfo(nodeName string) (*helper.EvictItem, bool) {
    85  	node, err := g.nodeLister.Get(nodeName)
    86  	if err != nil {
    87  		klog.Errorf("get cnr %v failed: %v", node, err)
    88  		return nil, false
    89  	}
    90  
    91  	if g.checker.CheckAgentReady(nodeName, g.agent) {
    92  		// not to trigger eviction if agent is still ready
    93  		return nil, false
    94  	}
    95  
    96  	pods := g.getNodeReclaimedPods(node)
    97  	if len(pods) == 0 {
    98  		// only need to evict reclaimed pods
    99  		return nil, false
   100  	}
   101  
   102  	return &helper.EvictItem{
   103  		PodKeys: map[string][]string{
   104  			nodeName: pods,
   105  		},
   106  	}, true
   107  }
   108  
   109  func (g *GenericAgentHandler) GetCNRTaintInfo(nodeName string) (*helper.CNRTaintItem, bool) {
   110  	cnr, err := g.cnrLister.Get(nodeName)
   111  	if err != nil {
   112  		klog.Errorf("get cnr %v failed: %v", nodeName, err)
   113  		return nil, false
   114  	}
   115  
   116  	if g.checker.CheckAgentReady(nodeName, g.agent) {
   117  		// not to trigger eviction if agent is still ready
   118  		return nil, false
   119  	} else if util.CNRTaintExists(cnr.Spec.Taints, helper.TaintNoScheduler) {
   120  		// if taint already exists, not to trigger taints
   121  		return nil, false
   122  	}
   123  
   124  	return &helper.CNRTaintItem{
   125  		Taints: map[string]*apis.Taint{
   126  			helper.TaintNameNoScheduler: helper.TaintNoScheduler,
   127  		},
   128  	}, true
   129  }
   130  
   131  // getNodeReclaimedPods returns reclaimed pods contained in the given node,
   132  // only those nodes with reclaimed pods should be triggered with eviction/taint logic for generic agents
   133  func (g *GenericAgentHandler) getNodeReclaimedPods(node *corev1.Node) (names []string) {
   134  	pods, err := native.GetPodsAssignedToNode(node.Name, g.podIndexer)
   135  	if err != nil {
   136  		utilruntime.HandleError(fmt.Errorf("unable to list pods from node %q: %v", node.Name, err))
   137  		return
   138  	}
   139  
   140  	for _, pod := range pods {
   141  		if ok, err := g.qosConf.CheckReclaimedQoSForPod(pod); err == nil && ok {
   142  			names = append(names, pod.Name)
   143  		}
   144  	}
   145  	return
   146  }