github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/lifecycle/cnr.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package lifecycle 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 corev1 "k8s.io/api/core/v1" 25 apiequality "k8s.io/apimachinery/pkg/api/equality" 26 "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 30 "k8s.io/apimachinery/pkg/util/wait" 31 coreinformers "k8s.io/client-go/informers/core/v1" 32 corelisters "k8s.io/client-go/listers/core/v1" 33 "k8s.io/client-go/tools/cache" 34 "k8s.io/client-go/util/workqueue" 35 "k8s.io/klog/v2" 36 37 apis "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" 38 informers "github.com/kubewharf/katalyst-api/pkg/client/informers/externalversions/node/v1alpha1" 39 listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" 40 "github.com/kubewharf/katalyst-core/pkg/client" 41 "github.com/kubewharf/katalyst-core/pkg/client/control" 42 "github.com/kubewharf/katalyst-core/pkg/config/controller" 43 "github.com/kubewharf/katalyst-core/pkg/config/generic" 44 "github.com/kubewharf/katalyst-core/pkg/metrics" 45 "github.com/kubewharf/katalyst-core/pkg/util/general" 46 ) 47 48 const ( 49 cnrLifecycleControllerName = "cnr-lifecycle" 50 cnrLifeCycleWorkerCount = 1 51 ) 52 53 const ( 54 clearCNRPeriod = 30 * time.Second 55 ) 56 57 type CNRLifecycle struct { 58 ctx context.Context 59 60 client *client.GenericClientSet 61 cnrControl control.CNRControl 62 63 nodeListerSynced cache.InformerSynced 64 nodeLister corelisters.NodeLister 65 cnrListerSynced cache.InformerSynced 66 cnrLister listers.CustomNodeResourceLister 67 68 // queue for node 69 syncQueue workqueue.RateLimitingInterface 70 71 // metricsEmitter for emit metrics 72 metricsEmitter metrics.MetricEmitter 73 } 74 75 func NewCNRLifecycle(ctx context.Context, 76 genericConf *generic.GenericConfiguration, 77 _ *controller.GenericControllerConfiguration, 78 _ *controller.CNRLifecycleConfig, 79 client *client.GenericClientSet, 80 nodeInformer coreinformers.NodeInformer, 81 cnrInformer informers.CustomNodeResourceInformer, 82 metricsEmitter metrics.MetricEmitter, 83 ) (*CNRLifecycle, error) { 84 cnrLifecycle := &CNRLifecycle{ 85 ctx: ctx, 86 client: client, 87 syncQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), 88 cnrLifecycleControllerName), 89 } 90 91 nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 92 AddFunc: cnrLifecycle.addNodeEventHandle, 93 UpdateFunc: cnrLifecycle.updateNodeEventHandle, 94 }) 95 cnrLifecycle.nodeListerSynced = nodeInformer.Informer().HasSynced 96 cnrLifecycle.nodeLister = nodeInformer.Lister() 97 98 cnrInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 99 AddFunc: cnrLifecycle.addCNREventHandle, 100 UpdateFunc: cnrLifecycle.updateCNREventHandle, 101 DeleteFunc: cnrLifecycle.deleteCNREventHandle, 102 }) 103 cnrLifecycle.cnrLister = cnrInformer.Lister() 104 cnrLifecycle.cnrListerSynced = cnrInformer.Informer().HasSynced 105 106 if metricsEmitter == nil { 107 cnrLifecycle.metricsEmitter = metrics.DummyMetrics{} 108 } else { 109 cnrLifecycle.metricsEmitter = metricsEmitter.WithTags(cnrLifecycleControllerName) 110 } 111 112 cnrLifecycle.cnrControl = control.DummyCNRControl{} 113 if !genericConf.DryRun { 114 cnrLifecycle.cnrControl = control.NewCNRControlImpl(client.InternalClient) 115 } 116 117 return cnrLifecycle, nil 118 } 119 120 func (cl *CNRLifecycle) Run() { 121 defer utilruntime.HandleCrash() 122 defer cl.syncQueue.ShutDown() 123 124 defer klog.Infof("Shutting down %s controller", cnrLifecycleControllerName) 125 126 if !cache.WaitForCacheSync(cl.ctx.Done(), cl.nodeListerSynced, cl.cnrListerSynced) { 127 utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", cnrLifecycleControllerName)) 128 return 129 } 130 klog.Infof("Caches are synced for %s controller", cnrLifecycleControllerName) 131 klog.Infof("start %d workers for %s controller", cnrLifeCycleWorkerCount, cnrLifecycleControllerName) 132 133 go wait.Until(cl.clearUnexpectedCNR, clearCNRPeriod, cl.ctx.Done()) 134 for i := 0; i < cnrLifeCycleWorkerCount; i++ { 135 go wait.Until(cl.worker, time.Second, cl.ctx.Done()) 136 } 137 138 <-cl.ctx.Done() 139 } 140 141 func (cl *CNRLifecycle) addNodeEventHandle(obj interface{}) { 142 n, ok := obj.(*corev1.Node) 143 if !ok { 144 klog.Errorf("cannot convert obj to *corev1.Node: %v", obj) 145 return 146 } 147 klog.V(4).Infof("notice addition of Node %s", n.Name) 148 cl.enqueueWorkItem(n) 149 } 150 151 func (cl *CNRLifecycle) updateNodeEventHandle(old, cur interface{}) { 152 oldNode, ok := old.(*corev1.Node) 153 if !ok { 154 klog.Errorf("cannot convert oldObj to *corev1.Node: %v", old) 155 return 156 } 157 158 curNode, ok := cur.(*corev1.Node) 159 if !ok { 160 klog.Errorf("cannot convert curObj to *corev1.Node: %v", cur) 161 return 162 } 163 164 if curNode.Labels == nil { 165 return 166 } 167 168 if !general.CheckMapEqual(oldNode.Labels, curNode.Labels) { 169 cl.enqueueWorkItem(curNode) 170 } 171 } 172 173 func (cl *CNRLifecycle) addCNREventHandle(obj interface{}) { 174 c, ok := obj.(*apis.CustomNodeResource) 175 if !ok { 176 klog.Errorf("cannot convert obj to *apis.CNR: %v", obj) 177 return 178 } 179 klog.V(4).Infof("notice addition of cnr %s", c.Name) 180 181 cl.enqueueWorkItem(obj) 182 } 183 184 func (cl *CNRLifecycle) updateCNREventHandle(_, new interface{}) { 185 c, ok := new.(*apis.CustomNodeResource) 186 if !ok { 187 klog.Errorf("cannot convert newObj to *apis.CNR: %v", c) 188 return 189 } 190 klog.V(4).Infof("notice addition of cnr %s", c.Name) 191 192 cl.enqueueWorkItem(new) 193 } 194 195 func (cl *CNRLifecycle) deleteCNREventHandle(obj interface{}) { 196 c, ok := obj.(*apis.CustomNodeResource) 197 if !ok { 198 klog.Errorf("cannot convert oldObj to *apis.CNR: %v", c) 199 return 200 } 201 klog.V(4).Infof("notice addition of cnr %s", c.Name) 202 203 cl.enqueueWorkItem(obj) 204 } 205 206 func (cl *CNRLifecycle) worker() { 207 for cl.processNextWorkItem() { 208 } 209 } 210 211 // processNextWorkItem dequeues items, processes them, and marks them done. 212 // It enforces that the sync is never invoked concurrently with the same key. 213 func (cl *CNRLifecycle) processNextWorkItem() bool { 214 key, quit := cl.syncQueue.Get() 215 if quit { 216 return false 217 } 218 defer cl.syncQueue.Done(key) 219 220 err := cl.sync(key.(string)) 221 if err == nil { 222 cl.syncQueue.Forget(key) 223 return true 224 } 225 226 utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err)) 227 cl.syncQueue.AddRateLimited(key) 228 229 return true 230 } 231 232 // enqueueWorkItem enqueues the given node in the work queue. 233 func (cl *CNRLifecycle) enqueueWorkItem(obj interface{}) { 234 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 235 if err != nil { 236 utilruntime.HandleError(fmt.Errorf("Cound't get key for object %+v: %v", obj, err)) 237 return 238 } 239 cl.syncQueue.Add(key) 240 } 241 242 // sync syncs the given node. 243 func (cl *CNRLifecycle) sync(key string) error { 244 _, name, err := cache.SplitMetaNamespaceKey(key) 245 if err != nil { 246 return err 247 } 248 node, err := cl.nodeLister.Get(name) 249 if errors.IsNotFound(err) { 250 klog.Infof("node has been deleted %v", key) 251 return nil 252 } 253 if err != nil { 254 return err 255 } 256 257 err = cl.updateOrCreateCNR(node) 258 if err != nil { 259 return err 260 } 261 262 return nil 263 } 264 265 // clearUnexpectedCNR is used to clear unexpected cnr 266 // for instance, orphaned cnr due to unexpected node deletion options or manually creation 267 func (cl *CNRLifecycle) clearUnexpectedCNR() { 268 targetCNRSelector := labels.Everything() 269 cnrs, err := cl.cnrLister.List(targetCNRSelector) 270 if err != nil { 271 klog.Errorf("failed to list all cnr") 272 return 273 } 274 275 for _, cnr := range cnrs { 276 _, err := cl.nodeLister.Get(cnr.Name) 277 if errors.IsNotFound(err) { 278 // double check if this node is deleted 279 _, nErr := cl.client.KubeClient.CoreV1().Nodes().Get(cl.ctx, cnr.Name, metav1.GetOptions{ResourceVersion: "0"}) 280 if !errors.IsNotFound(nErr) { 281 continue 282 } 283 284 if dErr := cl.cnrControl.DeleteCNR(cl.ctx, cnr.Name); dErr != nil { 285 klog.Errorf("delete unexpected cnr %s failed: %v", cnr.Name, dErr) 286 } 287 continue 288 } else if err != nil { 289 klog.Errorf("get node for CNR %v failed in clear: %v", cnr.Name, err) 290 continue 291 } 292 } 293 } 294 295 func (cl *CNRLifecycle) updateOrCreateCNR(node *corev1.Node) error { 296 cnr, err := cl.cnrLister.Get(node.Name) 297 if err != nil && !errors.IsNotFound(err) { 298 return fmt.Errorf("failed to get cnr from lister %s: %v", node.Name, err) 299 } 300 if errors.IsNotFound(err) { 301 cnr = &apis.CustomNodeResource{ 302 ObjectMeta: metav1.ObjectMeta{ 303 Name: node.Name, 304 Labels: node.Labels, 305 }, 306 } 307 308 setCNROwnerReference(cnr, node) 309 _, err = cl.cnrControl.CreateCNR(cl.ctx, cnr) 310 if err != nil && !errors.IsAlreadyExists(err) { 311 return fmt.Errorf("failed to create cnr %s: %v", cnr.Name, err) 312 } 313 if errors.IsAlreadyExists(err) { 314 cnr, err = cl.client.InternalClient.NodeV1alpha1().CustomNodeResources().Get(cl.ctx, node.Name, metav1.GetOptions{ResourceVersion: "0"}) 315 if err != nil { 316 return fmt.Errorf("failed to get cnr from apiserver %s: %v", node.Name, err) 317 } 318 } 319 } 320 321 newCNR := cnr.DeepCopy() 322 newCNR.Labels = general.MergeMap(newCNR.Labels, node.Labels) 323 setCNROwnerReference(newCNR, node) 324 if apiequality.Semantic.DeepEqual(newCNR, cnr) { 325 return nil 326 } 327 328 _, err = cl.cnrControl.PatchCNRSpecAndMetadata(cl.ctx, cnr.Name, cnr, newCNR) 329 return err 330 } 331 332 func setCNROwnerReference(cnr *apis.CustomNodeResource, node *corev1.Node) { 333 if cnr == nil || node == nil { 334 return 335 } 336 337 blocker := true 338 cnr.OwnerReferences = []metav1.OwnerReference{ 339 { 340 APIVersion: "v1", 341 Kind: "Node", 342 Name: node.Name, 343 UID: node.GetUID(), 344 Controller: &blocker, 345 BlockOwnerDeletion: &blocker, 346 }, 347 } 348 }