github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/lifecycle/agent-healthz/healthz_controller.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package agent_healthz 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 corev1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/labels" 26 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 27 "k8s.io/apimachinery/pkg/util/wait" 28 coreinformers "k8s.io/client-go/informers/core/v1" 29 corelisters "k8s.io/client-go/listers/core/v1" 30 "k8s.io/client-go/tools/cache" 31 "k8s.io/client-go/util/flowcontrol" 32 "k8s.io/client-go/util/retry" 33 "k8s.io/klog/v2" 34 "k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler" 35 36 apis "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" 37 informers "github.com/kubewharf/katalyst-api/pkg/client/informers/externalversions/node/v1alpha1" 38 listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" 39 "github.com/kubewharf/katalyst-core/pkg/client" 40 "github.com/kubewharf/katalyst-core/pkg/client/control" 41 "github.com/kubewharf/katalyst-core/pkg/config/controller" 42 "github.com/kubewharf/katalyst-core/pkg/config/generic" 43 "github.com/kubewharf/katalyst-core/pkg/controller/lifecycle/agent-healthz/handler" 44 "github.com/kubewharf/katalyst-core/pkg/controller/lifecycle/agent-healthz/helper" 45 "github.com/kubewharf/katalyst-core/pkg/metrics" 46 "github.com/kubewharf/katalyst-core/pkg/util/native" 47 ) 48 49 const AgentHealthzControllerName = "agent-healthz" 50 51 const metricsNameHealthState = "health_state" 52 53 const ( 54 stateNormal = "Normal" 55 stateFullDisruption = "FullDisruption" 56 statePartialDisruption = "PartialDisruption" 57 ) 58 59 type HealthzController struct { 60 ctx context.Context 61 emitter metrics.MetricEmitter 62 63 nodeLister corelisters.NodeLister 64 cnrLister listers.CustomNodeResourceLister 65 nodeListerSynced cache.InformerSynced 66 podListerSynced cache.InformerSynced 67 cnrListerSynced cache.InformerSynced 68 69 taintThreshold float32 70 taintLimiterQOS float32 71 evictThreshold float32 72 evictionLimiterQPS float32 73 nodeSelector labels.Selector 74 75 taintQueue *scheduler.RateLimitedTimedQueue 76 evictQueue *scheduler.RateLimitedTimedQueue 77 78 taintHelper *helper.CNRTaintHelper 79 evictHelper *helper.EvictHelper 80 healthzHelper *helper.HealthzHelper 81 handlers map[string]handler.AgentHandler 82 } 83 84 func NewHealthzController(ctx context.Context, 85 genericConf *generic.GenericConfiguration, 86 _ *controller.GenericControllerConfiguration, 87 conf *controller.LifeCycleConfig, 88 client *client.GenericClientSet, 89 nodeInformer coreinformers.NodeInformer, 90 podInformer coreinformers.PodInformer, 91 cnrInformer informers.CustomNodeResourceInformer, 92 metricsEmitter metrics.MetricEmitter, 93 ) (*HealthzController, error) { 94 ec := &HealthzController{ 95 ctx: ctx, 96 emitter: metricsEmitter, 97 98 taintThreshold: conf.DisruptionTaintThreshold, 99 taintLimiterQOS: conf.TaintQPS, 100 evictThreshold: conf.DisruptionEvictThreshold, 101 evictionLimiterQPS: conf.EvictQPS, 102 nodeSelector: conf.NodeSelector, 103 104 taintQueue: scheduler.NewRateLimitedTimedQueue(flowcontrol.NewTokenBucketRateLimiter(conf.TaintQPS, scheduler.EvictionRateLimiterBurst)), 105 evictQueue: scheduler.NewRateLimitedTimedQueue(flowcontrol.NewTokenBucketRateLimiter(conf.EvictQPS, scheduler.EvictionRateLimiterBurst)), 106 107 handlers: make(map[string]handler.AgentHandler), 108 } 109 110 var ( 111 cnrControl control.CNRControl = control.DummyCNRControl{} 112 podControl control.PodEjector = control.DummyPodEjector{} 113 ) 114 if !genericConf.DryRun && !conf.DryRun { 115 cnrControl = control.NewCNRControlImpl(client.InternalClient) 116 podControl = control.NewRealPodEjector(client.KubeClient) 117 } 118 119 ec.nodeListerSynced = nodeInformer.Informer().HasSynced 120 ec.nodeLister = nodeInformer.Lister() 121 122 ec.cnrLister = cnrInformer.Lister() 123 ec.cnrListerSynced = cnrInformer.Informer().HasSynced 124 125 ec.podListerSynced = podInformer.Informer().HasSynced 126 podIndexer := podInformer.Informer().GetIndexer() 127 128 if err := native.AddNodeNameIndexerForPod(podInformer); err != nil { 129 return nil, err 130 } 131 132 if metricsEmitter == nil { 133 ec.emitter = metrics.DummyMetrics{} 134 } else { 135 ec.emitter = metricsEmitter.WithTags("agent-healthz") 136 } 137 138 ec.healthzHelper = helper.NewHealthzHelper(ctx, conf, ec.emitter, ec.nodeSelector, conf.AgentSelector, podIndexer, ec.nodeLister, ec.cnrLister) 139 ec.taintHelper = helper.NewTaintHelper(ctx, ec.emitter, cnrControl, ec.nodeLister, ec.cnrLister, ec.taintQueue, ec.healthzHelper) 140 ec.evictHelper = helper.NewEvictHelper(ctx, ec.emitter, podControl, ec.nodeLister, ec.cnrLister, ec.evictQueue, ec.healthzHelper) 141 142 registeredHandlerFuncs := handler.GetRegisterAgentHandlerFuncs() 143 for agent := range conf.AgentSelector { 144 initFunc := handler.NewGenericAgentHandler 145 146 if handlerName, handlerExist := conf.AgentHandlers[agent]; handlerExist { 147 if handlerFunc, funcExist := registeredHandlerFuncs[handlerName]; funcExist { 148 initFunc = handlerFunc 149 } 150 } 151 152 ec.handlers[agent] = initFunc(ctx, agent, ec.emitter, genericConf, conf, 153 ec.nodeSelector, podIndexer, ec.nodeLister, ec.cnrLister, ec.healthzHelper) 154 } 155 156 native.SetPodTransformer(podTransformerFunc) 157 return ec, nil 158 } 159 160 func (ec *HealthzController) Run() { 161 defer utilruntime.HandleCrash() 162 defer klog.Infof("shutting down %s controller", AgentHealthzControllerName) 163 164 if !cache.WaitForCacheSync(ec.ctx.Done(), ec.nodeListerSynced, ec.cnrListerSynced, ec.podListerSynced) { 165 utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", AgentHealthzControllerName)) 166 return 167 } 168 klog.Infof("caches are synced for %s controller", AgentHealthzControllerName) 169 170 go wait.Until(ec.syncAgentHealth, time.Second*30, ec.ctx.Done()) 171 ec.healthzHelper.Run() 172 ec.taintHelper.Run() 173 ec.evictHelper.Run() 174 <-ec.ctx.Done() 175 } 176 177 // syncAgentHealth is the main logic of health checking, 178 // and it will push those to-be-handled node into a standard queue 179 func (ec *HealthzController) syncAgentHealth() { 180 nodes, err := ec.nodeLister.List(ec.nodeSelector) 181 if err != nil { 182 klog.Errorf("list node with select %s err %v", ec.nodeSelector.String(), err) 183 return 184 } 185 186 taints := make(map[string]*helper.CNRTaintItem) 187 evicts := make(map[string]*helper.EvictItem) 188 for _, node := range nodes { 189 for _, h := range ec.handlers { 190 if item, ok := h.GetCNRTaintInfo(node.Name); ok && item != nil && item.Taints != nil { 191 if _, exist := taints[node.Name]; !exist { 192 taints[node.Name] = &helper.CNRTaintItem{ 193 Taints: make(map[string]*apis.Taint), 194 } 195 } 196 197 for t, taint := range item.Taints { 198 taints[node.Name].Taints[t] = taint 199 } 200 } 201 202 if item, ok := h.GetEvictionInfo(node.Name); ok && item != nil && len(item.PodKeys) > 0 { 203 if _, exist := evicts[node.Name]; !exist { 204 evicts[node.Name] = &helper.EvictItem{ 205 PodKeys: make(map[string][]string), 206 } 207 } 208 209 for agent, pods := range item.PodKeys { 210 evicts[node.Name].PodKeys[agent] = pods 211 } 212 } 213 } 214 } 215 216 klog.Infof("we need to taint %v nodes, evict %v nodes in total", len(taints), len(evicts)) 217 218 taintState := ec.computeClusterState(len(nodes), len(taints), ec.taintThreshold) 219 ec.handleTaintDisruption(taintState) 220 for _, node := range nodes { 221 if item, ok := taints[node.Name]; ok { 222 ec.taintQueue.Add(node.Name, item) 223 } else { 224 ec.taintQueue.Remove(node.Name) 225 if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { 226 return ec.taintHelper.TryUNTaintCNR(node.Name) 227 }); err != nil { 228 klog.Infof("untaint %v err: %v", node.Name, err) 229 } 230 } 231 } 232 233 evictState := ec.computeClusterState(len(nodes), len(evicts), ec.evictThreshold) 234 ec.handleEvictDisruption(evictState) 235 for _, node := range nodes { 236 if _, ok := evicts[node.Name]; ok { 237 ec.evictQueue.Add(node.Name, evicts[node.Name]) 238 } else { 239 ec.evictQueue.Remove(node.Name) 240 } 241 } 242 } 243 244 // computeClusterState returns a slice of conditions considering all nodes in a zone 245 // - fullyDisrupted if there are no Ready Nodes 246 // - partiallyDisrupted if at less than nc.unhealthyZoneThreshold percent of Nodes are not Ready 247 // - normal otherwise 248 func (ec *HealthzController) computeClusterState(readyNodes, notReadyNodes int, threshold float32) string { 249 switch { 250 case readyNodes == 0 && notReadyNodes > 0: 251 return stateFullDisruption 252 case notReadyNodes > 2 && float32(notReadyNodes)/float32(notReadyNodes+readyNodes) > threshold: 253 return statePartialDisruption 254 default: 255 return stateNormal 256 } 257 } 258 259 // handleTaintDisruption is used as a protection logic, if the cluster fall into 260 // unhealthy state in a large scope, perhaps something goes wrong, we should hold on tainting 261 func (ec *HealthzController) handleTaintDisruption(healthState string) { 262 if healthState == stateFullDisruption || healthState == statePartialDisruption { 263 ec.taintQueue.SwapLimiter(0) 264 } else { 265 ec.taintQueue.SwapLimiter(ec.taintLimiterQOS) 266 } 267 268 _ = ec.emitter.StoreInt64(metricsNameHealthState, 1, metrics.MetricTypeNameRaw, 269 []metrics.MetricTag{ 270 {Key: "action", Val: "taint"}, 271 {Key: "status", Val: healthState}, 272 {Key: "threshold", Val: fmt.Sprintf("%v", ec.taintThreshold)}, 273 }...) 274 klog.Infof("controller detect taint states for nodes are %v", healthState) 275 } 276 277 // handleDisruption is used as a protection logic, if the cluster fall into 278 // unhealthy state in a large scope, perhaps something goes wrong, we should hold on evicting 279 func (ec *HealthzController) handleEvictDisruption(healthState string) { 280 if healthState == stateFullDisruption || healthState == statePartialDisruption { 281 ec.evictQueue.SwapLimiter(0) 282 } else { 283 ec.evictQueue.SwapLimiter(ec.taintLimiterQOS) 284 } 285 286 _ = ec.emitter.StoreInt64(metricsNameHealthState, 1, metrics.MetricTypeNameRaw, 287 []metrics.MetricTag{ 288 {Key: "action", Val: "evict"}, 289 {Key: "status", Val: healthState}, 290 {Key: "threshold", Val: fmt.Sprintf("%v", ec.evictThreshold)}, 291 }...) 292 klog.Infof("controller detect taint states for nodes are %v", healthState) 293 } 294 295 func podTransformerFunc(src, dest *corev1.Pod) { 296 dest.Spec.NodeName = src.Spec.NodeName 297 containersTransformerFunc(&src.Spec.Containers, &dest.Spec.Containers) 298 containerStatusesTransformerFunc(&src.Status.ContainerStatuses, &dest.Status.ContainerStatuses) 299 } 300 301 func containersTransformerFunc(src, dst *[]corev1.Container) { 302 if src == nil || len(*src) == 0 { 303 return 304 } 305 306 if len(*dst) == 0 { 307 *dst = make([]corev1.Container, len(*src)) 308 } 309 310 for i, c := range *src { 311 (*dst)[i].Name = c.Name 312 } 313 } 314 315 func containerStatusesTransformerFunc(src, dst *[]corev1.ContainerStatus) { 316 if src == nil || len(*src) == 0 { 317 return 318 } 319 320 if len(*dst) == 0 { 321 *dst = make([]corev1.ContainerStatus, len(*src)) 322 } 323 324 for i, c := range *src { 325 (*dst)[i].Name = c.Name 326 (*dst)[i].Ready = c.Ready 327 } 328 }