github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/lifecycle/agent-healthz/handler/handler_generic.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package handler 18 19 import ( 20 "context" 21 "fmt" 22 23 corev1 "k8s.io/api/core/v1" 24 "k8s.io/apimachinery/pkg/labels" 25 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 26 corelisters "k8s.io/client-go/listers/core/v1" 27 "k8s.io/client-go/tools/cache" 28 "k8s.io/klog/v2" 29 30 apis "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" 31 listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" 32 "github.com/kubewharf/katalyst-core/pkg/config/controller" 33 "github.com/kubewharf/katalyst-core/pkg/config/generic" 34 "github.com/kubewharf/katalyst-core/pkg/controller/lifecycle/agent-healthz/helper" 35 "github.com/kubewharf/katalyst-core/pkg/metrics" 36 "github.com/kubewharf/katalyst-core/pkg/util" 37 "github.com/kubewharf/katalyst-core/pkg/util/native" 38 ) 39 40 const AgentHandlerGeneric = "generic" 41 42 func init() { 43 RegisterAgentHandlerFunc(AgentHandlerGeneric, NewGenericAgentHandler) 44 } 45 46 // GenericAgentHandler implements AgentHandler with generic 47 // actions: i.e. taint cnr and trigger eviction for reclaimed_cores 48 type GenericAgentHandler struct { 49 ctx context.Context 50 agent string 51 emitter metrics.MetricEmitter 52 53 nodeSelector labels.Selector 54 qosConf *generic.QoSConfiguration 55 56 podIndexer cache.Indexer 57 nodeLister corelisters.NodeLister 58 cnrLister listers.CustomNodeResourceLister 59 60 checker *helper.HealthzHelper 61 } 62 63 func NewGenericAgentHandler(ctx context.Context, agent string, emitter metrics.MetricEmitter, 64 genericConf *generic.GenericConfiguration, _ *controller.LifeCycleConfig, nodeSelector labels.Selector, 65 podIndexer cache.Indexer, nodeLister corelisters.NodeLister, cnrLister listers.CustomNodeResourceLister, 66 checker *helper.HealthzHelper, 67 ) AgentHandler { 68 return &GenericAgentHandler{ 69 ctx: ctx, 70 agent: agent, 71 emitter: emitter, 72 73 nodeSelector: nodeSelector, 74 qosConf: genericConf.QoSConfiguration, 75 76 podIndexer: podIndexer, 77 nodeLister: nodeLister, 78 cnrLister: cnrLister, 79 80 checker: checker, 81 } 82 } 83 84 func (g *GenericAgentHandler) GetEvictionInfo(nodeName string) (*helper.EvictItem, bool) { 85 node, err := g.nodeLister.Get(nodeName) 86 if err != nil { 87 klog.Errorf("get cnr %v failed: %v", node, err) 88 return nil, false 89 } 90 91 if g.checker.CheckAgentReady(nodeName, g.agent) { 92 // not to trigger eviction if agent is still ready 93 return nil, false 94 } 95 96 pods := g.getNodeReclaimedPods(node) 97 if len(pods) == 0 { 98 // only need to evict reclaimed pods 99 return nil, false 100 } 101 102 return &helper.EvictItem{ 103 PodKeys: map[string][]string{ 104 nodeName: pods, 105 }, 106 }, true 107 } 108 109 func (g *GenericAgentHandler) GetCNRTaintInfo(nodeName string) (*helper.CNRTaintItem, bool) { 110 cnr, err := g.cnrLister.Get(nodeName) 111 if err != nil { 112 klog.Errorf("get cnr %v failed: %v", nodeName, err) 113 return nil, false 114 } 115 116 if g.checker.CheckAgentReady(nodeName, g.agent) { 117 // not to trigger eviction if agent is still ready 118 return nil, false 119 } else if util.CNRTaintExists(cnr.Spec.Taints, helper.TaintNoScheduler) { 120 // if taint already exists, not to trigger taints 121 return nil, false 122 } 123 124 return &helper.CNRTaintItem{ 125 Taints: map[string]*apis.Taint{ 126 helper.TaintNameNoScheduler: helper.TaintNoScheduler, 127 }, 128 }, true 129 } 130 131 // getNodeReclaimedPods returns reclaimed pods contained in the given node, 132 // only those nodes with reclaimed pods should be triggered with eviction/taint logic for generic agents 133 func (g *GenericAgentHandler) getNodeReclaimedPods(node *corev1.Node) (names []string) { 134 pods, err := native.GetPodsAssignedToNode(node.Name, g.podIndexer) 135 if err != nil { 136 utilruntime.HandleError(fmt.Errorf("unable to list pods from node %q: %v", node.Name, err)) 137 return 138 } 139 140 for _, pod := range pods { 141 if ok, err := g.qosConf.CheckReclaimedQoSForPod(pod); err == nil && ok { 142 names = append(names, pod.Name) 143 } 144 } 145 return 146 }