github.com/cilium/cilium@v1.16.2/operator/cmd/cilium_node.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package cmd 5 6 import ( 7 "context" 8 "fmt" 9 "strings" 10 "sync" 11 12 "k8s.io/apimachinery/pkg/api/errors" 13 meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 "k8s.io/client-go/tools/cache" 15 "k8s.io/client-go/util/workqueue" 16 17 "github.com/cilium/cilium/pkg/ipam/allocator" 18 cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 19 k8sClient "github.com/cilium/cilium/pkg/k8s/client" 20 "github.com/cilium/cilium/pkg/k8s/informer" 21 "github.com/cilium/cilium/pkg/k8s/utils" 22 "github.com/cilium/cilium/pkg/kvstore/store" 23 nodeStore "github.com/cilium/cilium/pkg/node/store" 24 nodeTypes "github.com/cilium/cilium/pkg/node/types" 25 "github.com/cilium/cilium/pkg/option" 26 ) 27 28 // ciliumNodeName is only used to implement NamedKey interface. 29 type ciliumNodeName struct { 30 cluster string 31 name string 32 } 33 34 func (c *ciliumNodeName) GetKeyName() string { 35 return nodeTypes.GetKeyNodeName(c.cluster, c.name) 36 } 37 38 type ciliumNodeManagerQueueSyncedKey struct{} 39 40 type ciliumNodeSynchronizer struct { 41 clientset k8sClient.Clientset 42 nodeManager allocator.NodeEventHandler 43 withKVStore bool 44 45 // ciliumNodeStore contains all CiliumNodes present in k8s. 46 ciliumNodeStore cache.Store 47 48 k8sCiliumNodesCacheSynced chan struct{} 49 ciliumNodeManagerQueueSynced chan struct{} 50 } 51 52 func newCiliumNodeSynchronizer(clientset k8sClient.Clientset, nodeManager allocator.NodeEventHandler, withKVStore bool) *ciliumNodeSynchronizer { 53 return &ciliumNodeSynchronizer{ 54 clientset: clientset, 55 nodeManager: nodeManager, 56 withKVStore: withKVStore, 57 58 k8sCiliumNodesCacheSynced: make(chan struct{}), 59 ciliumNodeManagerQueueSynced: make(chan struct{}), 60 } 61 } 62 63 func (s *ciliumNodeSynchronizer) Start(ctx context.Context, wg *sync.WaitGroup) error { 64 var ( 65 ciliumNodeKVStore *store.SharedStore 66 err error 67 nodeManagerSyncHandler func(key string) error 68 kvStoreSyncHandler func(key string) error 69 connectedToKVStore = make(chan struct{}) 70 71 resourceEventHandler = cache.ResourceEventHandlerFuncs{} 72 ciliumNodeManagerQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 73 kvStoreQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) 74 ) 75 76 // KVStore is enabled -> we will run the event handler to sync objects into 77 // KVStore. 78 if s.withKVStore { 79 // Connect to the KVStore asynchronously so that we are able to start 80 // the operator without relying on the KVStore to be up. 81 // Start a goroutine to GC all CiliumNodes from the KVStore that are 82 // no longer running. 83 wg.Add(1) 84 go func() { 85 defer wg.Done() 86 87 log.Info("Starting to synchronize CiliumNode custom resources to KVStore") 88 89 ciliumNodeKVStore, err = store.JoinSharedStore(store.Configuration{ 90 Prefix: nodeStore.NodeStorePrefix, 91 KeyCreator: nodeStore.KeyCreator, 92 }) 93 94 if err != nil { 95 log.WithError(err).Fatal("Unable to setup node watcher") 96 } 97 close(connectedToKVStore) 98 99 <-s.k8sCiliumNodesCacheSynced 100 // Since we processed all events received from k8s we know that 101 // at this point the list in ciliumNodeStore should be the source of 102 // truth and we need to delete all nodes in the kvNodeStore that are 103 // *not* present in the ciliumNodeStore. 104 listOfCiliumNodes := s.ciliumNodeStore.ListKeys() 105 106 kvStoreNodes := ciliumNodeKVStore.SharedKeysMap() 107 108 for _, ciliumNode := range listOfCiliumNodes { 109 // The remaining kvStoreNodes are leftovers that need to be GCed 110 kvStoreNodeName := nodeTypes.GetKeyNodeName(option.Config.ClusterName, ciliumNode) 111 delete(kvStoreNodes, kvStoreNodeName) 112 } 113 114 if len(listOfCiliumNodes) == 0 && len(kvStoreNodes) != 0 { 115 log.Warn("Preventing GC of nodes in the KVStore due the nonexistence of any CiliumNodes in kube-apiserver") 116 return 117 } 118 119 for _, kvStoreNode := range kvStoreNodes { 120 // Only delete the nodes that belong to our cluster 121 if strings.HasPrefix(kvStoreNode.GetKeyName(), option.Config.ClusterName) { 122 ciliumNodeKVStore.DeleteLocalKey(ctx, kvStoreNode) 123 } 124 } 125 }() 126 } else { 127 log.Info("Starting to synchronize CiliumNode custom resources") 128 } 129 130 if s.nodeManager != nil { 131 nodeManagerSyncHandler = s.syncHandlerConstructor( 132 func(node *cilium_v2.CiliumNode) { 133 s.nodeManager.Delete(node) 134 }, 135 func(node *cilium_v2.CiliumNode) { 136 // node is deep copied before it is stored in pkg/aws/eni 137 s.nodeManager.Upsert(node) 138 }) 139 } 140 141 if s.withKVStore { 142 kvStoreSyncHandler = s.syncHandlerConstructor( 143 func(node *cilium_v2.CiliumNode) { 144 nodeDel := ciliumNodeName{ 145 cluster: option.Config.ClusterName, 146 name: node.Name, 147 } 148 ciliumNodeKVStore.DeleteLocalKey(ctx, &nodeDel) 149 }, 150 func(node *cilium_v2.CiliumNode) { 151 nodeNew := nodeTypes.ParseCiliumNode(node) 152 ciliumNodeKVStore.UpdateKeySync(ctx, &nodeNew, false) 153 }) 154 } 155 156 // If both nodeManager and KVStore are nil, then we don't need to handle 157 // any watcher events, but we will need to keep all CiliumNodes in 158 // memory because 'ciliumNodeStore' is used across the operator 159 // to get the latest state of a CiliumNode. 160 if s.withKVStore || s.nodeManager != nil { 161 resourceEventHandler = cache.ResourceEventHandlerFuncs{ 162 AddFunc: func(obj interface{}) { 163 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 164 if err != nil { 165 log.WithError(err).Warning("Unable to process CiliumNode Add event") 166 return 167 } 168 if s.nodeManager != nil { 169 ciliumNodeManagerQueue.Add(key) 170 } 171 if s.withKVStore { 172 kvStoreQueue.Add(key) 173 } 174 }, 175 UpdateFunc: func(oldObj, newObj interface{}) { 176 if oldNode := informer.CastInformerEvent[cilium_v2.CiliumNode](oldObj); oldNode != nil { 177 if newNode := informer.CastInformerEvent[cilium_v2.CiliumNode](newObj); newNode != nil { 178 if oldNode.DeepEqual(newNode) { 179 return 180 } 181 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(newObj) 182 if err != nil { 183 log.WithError(err).Warning("Unable to process CiliumNode Update event") 184 return 185 } 186 if s.nodeManager != nil { 187 ciliumNodeManagerQueue.Add(key) 188 } 189 if s.withKVStore { 190 kvStoreQueue.Add(key) 191 } 192 } else { 193 log.Warningf("Unknown CiliumNode object type %T received: %+v", newNode, newNode) 194 } 195 } else { 196 log.Warningf("Unknown CiliumNode object type %T received: %+v", oldNode, oldNode) 197 } 198 }, 199 DeleteFunc: func(obj interface{}) { 200 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 201 if err != nil { 202 log.WithError(err).Warning("Unable to process CiliumNode Delete event") 203 return 204 } 205 if s.nodeManager != nil { 206 ciliumNodeManagerQueue.Add(key) 207 } 208 if s.withKVStore { 209 kvStoreQueue.Add(key) 210 } 211 }, 212 } 213 } 214 215 // TODO: The operator is currently storing a full copy of the 216 // CiliumNode resource, as the resource grows, we may want to consider 217 // introducing a slim version of it. 218 var ciliumNodeInformer cache.Controller 219 s.ciliumNodeStore, ciliumNodeInformer = informer.NewInformer( 220 utils.ListerWatcherFromTyped[*cilium_v2.CiliumNodeList](s.clientset.CiliumV2().CiliumNodes()), 221 &cilium_v2.CiliumNode{}, 222 0, 223 resourceEventHandler, 224 nil, 225 ) 226 227 wg.Add(1) 228 go func() { 229 defer wg.Done() 230 231 cache.WaitForCacheSync(ctx.Done(), ciliumNodeInformer.HasSynced) 232 close(s.k8sCiliumNodesCacheSynced) 233 ciliumNodeManagerQueue.Add(ciliumNodeManagerQueueSyncedKey{}) 234 log.Info("CiliumNodes caches synced with Kubernetes") 235 // Only handle events if nodeManagerSyncHandler is not nil. If it is nil 236 // then there isn't any event handler set for CiliumNodes events. 237 if nodeManagerSyncHandler != nil { 238 go func() { 239 // infinite loop. run in a goroutine to unblock code execution 240 for s.processNextWorkItem(ciliumNodeManagerQueue, nodeManagerSyncHandler) { 241 } 242 }() 243 } 244 // Start handling events for KVStore **after** nodeManagerSyncHandler 245 // otherwise Cilium Operator will block until the KVStore is available. 246 // This might be problematic in clusters that have etcd-operator with 247 // cluster-pool ipam mode because they depend on Cilium Operator to be 248 // running and handling IP Addresses with nodeManagerSyncHandler. 249 // Only handle events if kvStoreSyncHandler is not nil. If it is nil 250 // then there isn't any event handler set for CiliumNodes events. 251 if s.withKVStore && kvStoreSyncHandler != nil { 252 <-connectedToKVStore 253 log.Info("Connected to the KVStore, syncing CiliumNodes to the KVStore") 254 // infinite loop it will block code execution 255 for s.processNextWorkItem(kvStoreQueue, kvStoreSyncHandler) { 256 } 257 } 258 }() 259 260 wg.Add(1) 261 go func() { 262 defer wg.Done() 263 defer kvStoreQueue.ShutDown() 264 defer ciliumNodeManagerQueue.ShutDown() 265 266 ciliumNodeInformer.Run(ctx.Done()) 267 }() 268 269 return nil 270 } 271 272 func (s *ciliumNodeSynchronizer) syncHandlerConstructor(notFoundHandler func(node *cilium_v2.CiliumNode), foundHandler func(node *cilium_v2.CiliumNode)) func(key string) error { 273 return func(key string) error { 274 _, name, err := cache.SplitMetaNamespaceKey(key) 275 if err != nil { 276 log.WithError(err).Error("Unable to process CiliumNode event") 277 return err 278 } 279 obj, exists, err := s.ciliumNodeStore.GetByKey(name) 280 281 // Delete handling 282 if !exists || errors.IsNotFound(err) { 283 notFoundHandler(&cilium_v2.CiliumNode{ 284 ObjectMeta: meta_v1.ObjectMeta{ 285 Name: name, 286 }, 287 }) 288 return nil 289 } 290 if err != nil { 291 log.WithError(err).Warning("Unable to retrieve CiliumNode from watcher store") 292 return err 293 } 294 cn, ok := obj.(*cilium_v2.CiliumNode) 295 if !ok { 296 tombstone, ok := obj.(cache.DeletedFinalStateUnknown) 297 if !ok { 298 return fmt.Errorf("couldn't get object from tombstone %T", obj) 299 } 300 cn, ok = tombstone.Obj.(*cilium_v2.CiliumNode) 301 if !ok { 302 return fmt.Errorf("tombstone contained object that is not a *cilium_v2.CiliumNode %T", obj) 303 } 304 } 305 if cn.DeletionTimestamp != nil { 306 notFoundHandler(cn) 307 return nil 308 } 309 foundHandler(cn) 310 return nil 311 } 312 } 313 314 // processNextWorkItem process all events from the workqueue. 315 func (s *ciliumNodeSynchronizer) processNextWorkItem(queue workqueue.RateLimitingInterface, syncHandler func(key string) error) bool { 316 key, quit := queue.Get() 317 if quit { 318 return false 319 } 320 defer queue.Done(key) 321 322 if _, ok := key.(ciliumNodeManagerQueueSyncedKey); ok { 323 close(s.ciliumNodeManagerQueueSynced) 324 return true 325 } 326 327 err := syncHandler(key.(string)) 328 if err == nil { 329 // If err is nil we can forget it from the queue, if it is not nil 330 // the queue handler will retry to process this key until it succeeds. 331 queue.Forget(key) 332 return true 333 } 334 335 log.WithError(err).Errorf("sync %q failed with %v", key, err) 336 queue.AddRateLimited(key) 337 338 return true 339 } 340 341 type ciliumNodeUpdateImplementation struct { 342 clientset k8sClient.Clientset 343 } 344 345 func (c *ciliumNodeUpdateImplementation) Create(node *cilium_v2.CiliumNode) (*cilium_v2.CiliumNode, error) { 346 return c.clientset.CiliumV2().CiliumNodes().Create(context.TODO(), node, meta_v1.CreateOptions{}) 347 } 348 349 func (c *ciliumNodeUpdateImplementation) Get(node string) (*cilium_v2.CiliumNode, error) { 350 return c.clientset.CiliumV2().CiliumNodes().Get(context.TODO(), node, meta_v1.GetOptions{}) 351 } 352 353 func (c *ciliumNodeUpdateImplementation) UpdateStatus(origNode, node *cilium_v2.CiliumNode) (*cilium_v2.CiliumNode, error) { 354 if origNode == nil || !origNode.Status.DeepEqual(&node.Status) { 355 return c.clientset.CiliumV2().CiliumNodes().UpdateStatus(context.TODO(), node, meta_v1.UpdateOptions{}) 356 } 357 return nil, nil 358 } 359 360 func (c *ciliumNodeUpdateImplementation) Update(origNode, node *cilium_v2.CiliumNode) (*cilium_v2.CiliumNode, error) { 361 if origNode == nil || !origNode.Spec.DeepEqual(&node.Spec) { 362 return c.clientset.CiliumV2().CiliumNodes().Update(context.TODO(), node, meta_v1.UpdateOptions{}) 363 } 364 return nil, nil 365 }