github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/lifecycle/cnc.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package lifecycle 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 corev1 "k8s.io/api/core/v1" 25 apiequality "k8s.io/apimachinery/pkg/api/equality" 26 "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 30 "k8s.io/apimachinery/pkg/util/wait" 31 coreinformers "k8s.io/client-go/informers/core/v1" 32 corelisters "k8s.io/client-go/listers/core/v1" 33 "k8s.io/client-go/tools/cache" 34 "k8s.io/client-go/util/workqueue" 35 "k8s.io/klog/v2" 36 37 apis "github.com/kubewharf/katalyst-api/pkg/apis/config/v1alpha1" 38 configinformers "github.com/kubewharf/katalyst-api/pkg/client/informers/externalversions/config/v1alpha1" 39 configlisters "github.com/kubewharf/katalyst-api/pkg/client/listers/config/v1alpha1" 40 "github.com/kubewharf/katalyst-core/pkg/client" 41 "github.com/kubewharf/katalyst-core/pkg/client/control" 42 "github.com/kubewharf/katalyst-core/pkg/config/controller" 43 "github.com/kubewharf/katalyst-core/pkg/config/generic" 44 "github.com/kubewharf/katalyst-core/pkg/metrics" 45 "github.com/kubewharf/katalyst-core/pkg/util/general" 46 ) 47 48 const ( 49 cncLifecycleControllerName = "cnc-lifecycle" 50 cncLifeCycleWorkerCount = 1 51 ) 52 53 const ( 54 clearCNCPeriod = 30 * time.Second 55 ) 56 57 type CNCLifecycle struct { 58 ctx context.Context 59 60 client *client.GenericClientSet 61 cncControl control.CNCControl 62 63 nodeListerSynced cache.InformerSynced 64 nodeLister corelisters.NodeLister 65 cncListerSynced cache.InformerSynced 66 cncLister configlisters.CustomNodeConfigLister 67 68 // queue for node 69 syncQueue workqueue.RateLimitingInterface 70 71 // metricsEmitter for emit metrics 72 metricsEmitter metrics.MetricEmitter 73 } 74 75 func NewCNCLifecycle(ctx context.Context, 76 genericConf *generic.GenericConfiguration, 77 _ *controller.GenericControllerConfiguration, 78 _ *controller.CNCLifecycleConfig, 79 client *client.GenericClientSet, 80 nodeInformer coreinformers.NodeInformer, 81 cncInformer configinformers.CustomNodeConfigInformer, 82 metricsEmitter metrics.MetricEmitter, 83 ) (*CNCLifecycle, error) { 84 cncLifecycle := &CNCLifecycle{ 85 ctx: ctx, 86 client: client, 87 syncQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), 88 cncLifecycleControllerName), 89 } 90 91 nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 92 AddFunc: cncLifecycle.addNodeEventHandle, 93 UpdateFunc: cncLifecycle.updateNodeEventHandle, 94 }) 95 cncLifecycle.nodeListerSynced = nodeInformer.Informer().HasSynced 96 cncLifecycle.nodeLister = nodeInformer.Lister() 97 98 cncInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 99 AddFunc: cncLifecycle.addCNCEventHandle, 100 UpdateFunc: cncLifecycle.updateCNCEventHandle, 101 DeleteFunc: cncLifecycle.deleteCNCEventHandle, 102 }) 103 cncLifecycle.cncListerSynced = cncInformer.Informer().HasSynced 104 cncLifecycle.cncLister = cncInformer.Lister() 105 106 if metricsEmitter == nil { 107 cncLifecycle.metricsEmitter = metrics.DummyMetrics{} 108 } else { 109 cncLifecycle.metricsEmitter = metricsEmitter.WithTags(cncLifecycleControllerName) 110 } 111 112 cncLifecycle.cncControl = control.DummyCNCControl{} 113 if !genericConf.DryRun { 114 cncLifecycle.cncControl = control.NewRealCNCControl(client.InternalClient) 115 } 116 117 return cncLifecycle, nil 118 } 119 120 func (cl *CNCLifecycle) Run() { 121 defer utilruntime.HandleCrash() 122 defer cl.syncQueue.ShutDown() 123 124 defer klog.Infof("Shutting down %s controller", cncLifecycleControllerName) 125 126 if !cache.WaitForCacheSync(cl.ctx.Done(), cl.nodeListerSynced, cl.cncListerSynced) { 127 utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", cncLifecycleControllerName)) 128 return 129 } 130 klog.Infof("Caches are synced for %s controller", cncLifecycleControllerName) 131 klog.Infof("start %d workers for %s controller", cncLifeCycleWorkerCount, cncLifecycleControllerName) 132 133 go wait.Until(cl.clearUnexpectedCNC, clearCNCPeriod, cl.ctx.Done()) 134 for i := 0; i < cncLifeCycleWorkerCount; i++ { 135 go wait.Until(cl.worker, time.Second, cl.ctx.Done()) 136 } 137 138 <-cl.ctx.Done() 139 } 140 141 func (cl *CNCLifecycle) addNodeEventHandle(obj interface{}) { 142 n, ok := obj.(*corev1.Node) 143 if !ok { 144 klog.Errorf("cannot convert obj to *corev1.Node: %v", obj) 145 return 146 } 147 klog.V(4).Infof("notice addition of Node %s", n.Name) 148 cl.enqueueWorkItem(n) 149 } 150 151 func (cl *CNCLifecycle) updateNodeEventHandle(old, cur interface{}) { 152 oldNode, ok := old.(*corev1.Node) 153 if !ok { 154 klog.Errorf("cannot convert oldObj to *corev1.Node: %v", old) 155 return 156 } 157 158 curNode, ok := cur.(*corev1.Node) 159 if !ok { 160 klog.Errorf("cannot convert curObj to *corev1.Node: %v", cur) 161 return 162 } 163 164 if curNode.Labels == nil { 165 return 166 } 167 168 if !general.CheckMapEqual(oldNode.Labels, curNode.Labels) { 169 cl.enqueueWorkItem(curNode) 170 } 171 } 172 173 func (cl *CNCLifecycle) addCNCEventHandle(obj interface{}) { 174 c, ok := obj.(*apis.CustomNodeConfig) 175 if !ok { 176 klog.Errorf("cannot convert obj to *apis.CustomNodeConfig: %v", obj) 177 return 178 } 179 klog.V(4).Infof("notice addition of cnc %s", c.Name) 180 181 cl.enqueueWorkItem(obj) 182 } 183 184 func (cl *CNCLifecycle) updateCNCEventHandle(_, new interface{}) { 185 c, ok := new.(*apis.CustomNodeConfig) 186 if !ok { 187 klog.Errorf("cannot convert oldObj to *apis.CustomNodeConfig: %v", c) 188 return 189 } 190 klog.V(4).Infof("notice addition of cnc %s", c.Name) 191 192 cl.enqueueWorkItem(new) 193 } 194 195 func (cl *CNCLifecycle) deleteCNCEventHandle(obj interface{}) { 196 c, ok := obj.(*apis.CustomNodeConfig) 197 if !ok { 198 klog.Errorf("cannot convert oldObj to *apis.CNC: %v", c) 199 return 200 } 201 klog.V(4).Infof("notice addition of cnc %s", c.Name) 202 203 cl.enqueueWorkItem(obj) 204 } 205 206 func (cl *CNCLifecycle) worker() { 207 for cl.processNextWorkItem() { 208 } 209 } 210 211 // processNextWorkItem dequeues items, processes them, and marks them done. 212 // It enforces that the sync is never invoked concurrently with the same key. 213 func (cl *CNCLifecycle) processNextWorkItem() bool { 214 key, quit := cl.syncQueue.Get() 215 if quit { 216 return false 217 } 218 defer cl.syncQueue.Done(key) 219 220 err := cl.sync(key.(string)) 221 if err == nil { 222 cl.syncQueue.Forget(key) 223 return true 224 } 225 226 utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err)) 227 cl.syncQueue.AddRateLimited(key) 228 229 return true 230 } 231 232 // enqueueWorkItem enqueues the given node in the work queue. 233 func (cl *CNCLifecycle) enqueueWorkItem(obj interface{}) { 234 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 235 if err != nil { 236 utilruntime.HandleError(fmt.Errorf("Cound't get key for object %+v: %v", obj, err)) 237 return 238 } 239 cl.syncQueue.Add(key) 240 } 241 242 // sync syncs the given node. 243 func (cl *CNCLifecycle) sync(key string) error { 244 _, name, err := cache.SplitMetaNamespaceKey(key) 245 if err != nil { 246 return err 247 } 248 node, err := cl.nodeLister.Get(name) 249 if errors.IsNotFound(err) { 250 klog.Infof("node has been deleted %v", key) 251 return nil 252 } 253 if err != nil { 254 return err 255 } 256 257 err = cl.updateOrCreateCNC(node) 258 if err != nil { 259 return err 260 } 261 262 return nil 263 } 264 265 // clearUnexpectedCNC is used to clear unexpected cnc 266 // for instance, orphaned cnc due to unexpected node deletion options or manually creation 267 func (cl *CNCLifecycle) clearUnexpectedCNC() { 268 targetCNCSelector := labels.Everything() 269 cncs, err := cl.cncLister.List(targetCNCSelector) 270 if err != nil { 271 klog.Errorf("failed to list all cnc") 272 return 273 } 274 275 for _, cnc := range cncs { 276 _, err := cl.nodeLister.Get(cnc.Name) 277 if errors.IsNotFound(err) { 278 // double check if this node is deleted 279 _, nErr := cl.client.KubeClient.CoreV1().Nodes().Get(cl.ctx, cnc.Name, metav1.GetOptions{ResourceVersion: "0"}) 280 if !errors.IsNotFound(nErr) { 281 continue 282 } 283 284 if dErr := cl.cncControl.DeleteCNC(cl.ctx, cnc.Name, metav1.DeleteOptions{}); dErr != nil { 285 klog.Errorf("delete unexpected cnc %s failed: %v", cnc.Name, dErr) 286 } 287 continue 288 } else if err != nil { 289 klog.Errorf("get node for CNC %v failed in clear: %v", cnc.Name, err) 290 continue 291 } 292 } 293 } 294 295 func (cl *CNCLifecycle) updateOrCreateCNC(node *corev1.Node) error { 296 cnc, err := cl.cncLister.Get(node.Name) 297 if err != nil && !errors.IsNotFound(err) { 298 return fmt.Errorf("failed to get cnc from lister %s: %v", node.Name, err) 299 } 300 if errors.IsNotFound(err) { 301 cnc = &apis.CustomNodeConfig{ 302 ObjectMeta: metav1.ObjectMeta{ 303 Name: node.Name, 304 Labels: node.Labels, 305 }, 306 } 307 308 setCNCOwnerReference(cnc, node) 309 _, err = cl.cncControl.CreateCNC(cl.ctx, cnc, metav1.CreateOptions{}) 310 if err != nil && !errors.IsAlreadyExists(err) { 311 return fmt.Errorf("failed to create cnc %s: %v", cnc.Name, err) 312 } 313 if errors.IsAlreadyExists(err) { 314 cnc, err = cl.client.InternalClient.ConfigV1alpha1().CustomNodeConfigs().Get(cl.ctx, node.Name, metav1.GetOptions{ResourceVersion: "0"}) 315 if err != nil { 316 return fmt.Errorf("failed to get cnc from apiserver %s: %v", node.Name, err) 317 } 318 } 319 } 320 321 newCNC := cnc.DeepCopy() 322 newCNC.Labels = node.Labels 323 setCNCOwnerReference(newCNC, node) 324 if apiequality.Semantic.DeepEqual(newCNC, cnc) { 325 return nil 326 } 327 328 _, err = cl.cncControl.PatchCNC(cl.ctx, cnc.Name, cnc, newCNC) 329 return err 330 } 331 332 func setCNCOwnerReference(cnc *apis.CustomNodeConfig, node *corev1.Node) { 333 if cnc == nil || node == nil { 334 return 335 } 336 337 blocker := true 338 cnc.OwnerReferences = []metav1.OwnerReference{ 339 { 340 APIVersion: "v1", 341 Kind: "Node", 342 Name: node.Name, 343 UID: node.GetUID(), 344 Controller: &blocker, 345 BlockOwnerDeletion: &blocker, 346 }, 347 } 348 }