github.com/cilium/cilium@v1.16.2/operator/cmd/cilium_node.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package cmd
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"strings"
    10  	"sync"
    11  
    12  	"k8s.io/apimachinery/pkg/api/errors"
    13  	meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    14  	"k8s.io/client-go/tools/cache"
    15  	"k8s.io/client-go/util/workqueue"
    16  
    17  	"github.com/cilium/cilium/pkg/ipam/allocator"
    18  	cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    19  	k8sClient "github.com/cilium/cilium/pkg/k8s/client"
    20  	"github.com/cilium/cilium/pkg/k8s/informer"
    21  	"github.com/cilium/cilium/pkg/k8s/utils"
    22  	"github.com/cilium/cilium/pkg/kvstore/store"
    23  	nodeStore "github.com/cilium/cilium/pkg/node/store"
    24  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    25  	"github.com/cilium/cilium/pkg/option"
    26  )
    27  
    28  // ciliumNodeName is only used to implement NamedKey interface.
    29  type ciliumNodeName struct {
    30  	cluster string
    31  	name    string
    32  }
    33  
    34  func (c *ciliumNodeName) GetKeyName() string {
    35  	return nodeTypes.GetKeyNodeName(c.cluster, c.name)
    36  }
    37  
    38  type ciliumNodeManagerQueueSyncedKey struct{}
    39  
    40  type ciliumNodeSynchronizer struct {
    41  	clientset   k8sClient.Clientset
    42  	nodeManager allocator.NodeEventHandler
    43  	withKVStore bool
    44  
    45  	// ciliumNodeStore contains all CiliumNodes present in k8s.
    46  	ciliumNodeStore cache.Store
    47  
    48  	k8sCiliumNodesCacheSynced    chan struct{}
    49  	ciliumNodeManagerQueueSynced chan struct{}
    50  }
    51  
    52  func newCiliumNodeSynchronizer(clientset k8sClient.Clientset, nodeManager allocator.NodeEventHandler, withKVStore bool) *ciliumNodeSynchronizer {
    53  	return &ciliumNodeSynchronizer{
    54  		clientset:   clientset,
    55  		nodeManager: nodeManager,
    56  		withKVStore: withKVStore,
    57  
    58  		k8sCiliumNodesCacheSynced:    make(chan struct{}),
    59  		ciliumNodeManagerQueueSynced: make(chan struct{}),
    60  	}
    61  }
    62  
    63  func (s *ciliumNodeSynchronizer) Start(ctx context.Context, wg *sync.WaitGroup) error {
    64  	var (
    65  		ciliumNodeKVStore      *store.SharedStore
    66  		err                    error
    67  		nodeManagerSyncHandler func(key string) error
    68  		kvStoreSyncHandler     func(key string) error
    69  		connectedToKVStore     = make(chan struct{})
    70  
    71  		resourceEventHandler   = cache.ResourceEventHandlerFuncs{}
    72  		ciliumNodeManagerQueue = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
    73  		kvStoreQueue           = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
    74  	)
    75  
    76  	// KVStore is enabled -> we will run the event handler to sync objects into
    77  	// KVStore.
    78  	if s.withKVStore {
    79  		// Connect to the KVStore asynchronously so that we are able to start
    80  		// the operator without relying on the KVStore to be up.
    81  		// Start a goroutine to GC all CiliumNodes from the KVStore that are
    82  		// no longer running.
    83  		wg.Add(1)
    84  		go func() {
    85  			defer wg.Done()
    86  
    87  			log.Info("Starting to synchronize CiliumNode custom resources to KVStore")
    88  
    89  			ciliumNodeKVStore, err = store.JoinSharedStore(store.Configuration{
    90  				Prefix:     nodeStore.NodeStorePrefix,
    91  				KeyCreator: nodeStore.KeyCreator,
    92  			})
    93  
    94  			if err != nil {
    95  				log.WithError(err).Fatal("Unable to setup node watcher")
    96  			}
    97  			close(connectedToKVStore)
    98  
    99  			<-s.k8sCiliumNodesCacheSynced
   100  			// Since we processed all events received from k8s we know that
   101  			// at this point the list in ciliumNodeStore should be the source of
   102  			// truth and we need to delete all nodes in the kvNodeStore that are
   103  			// *not* present in the ciliumNodeStore.
   104  			listOfCiliumNodes := s.ciliumNodeStore.ListKeys()
   105  
   106  			kvStoreNodes := ciliumNodeKVStore.SharedKeysMap()
   107  
   108  			for _, ciliumNode := range listOfCiliumNodes {
   109  				// The remaining kvStoreNodes are leftovers that need to be GCed
   110  				kvStoreNodeName := nodeTypes.GetKeyNodeName(option.Config.ClusterName, ciliumNode)
   111  				delete(kvStoreNodes, kvStoreNodeName)
   112  			}
   113  
   114  			if len(listOfCiliumNodes) == 0 && len(kvStoreNodes) != 0 {
   115  				log.Warn("Preventing GC of nodes in the KVStore due the nonexistence of any CiliumNodes in kube-apiserver")
   116  				return
   117  			}
   118  
   119  			for _, kvStoreNode := range kvStoreNodes {
   120  				// Only delete the nodes that belong to our cluster
   121  				if strings.HasPrefix(kvStoreNode.GetKeyName(), option.Config.ClusterName) {
   122  					ciliumNodeKVStore.DeleteLocalKey(ctx, kvStoreNode)
   123  				}
   124  			}
   125  		}()
   126  	} else {
   127  		log.Info("Starting to synchronize CiliumNode custom resources")
   128  	}
   129  
   130  	if s.nodeManager != nil {
   131  		nodeManagerSyncHandler = s.syncHandlerConstructor(
   132  			func(node *cilium_v2.CiliumNode) {
   133  				s.nodeManager.Delete(node)
   134  			},
   135  			func(node *cilium_v2.CiliumNode) {
   136  				// node is deep copied before it is stored in pkg/aws/eni
   137  				s.nodeManager.Upsert(node)
   138  			})
   139  	}
   140  
   141  	if s.withKVStore {
   142  		kvStoreSyncHandler = s.syncHandlerConstructor(
   143  			func(node *cilium_v2.CiliumNode) {
   144  				nodeDel := ciliumNodeName{
   145  					cluster: option.Config.ClusterName,
   146  					name:    node.Name,
   147  				}
   148  				ciliumNodeKVStore.DeleteLocalKey(ctx, &nodeDel)
   149  			},
   150  			func(node *cilium_v2.CiliumNode) {
   151  				nodeNew := nodeTypes.ParseCiliumNode(node)
   152  				ciliumNodeKVStore.UpdateKeySync(ctx, &nodeNew, false)
   153  			})
   154  	}
   155  
   156  	// If both nodeManager and KVStore are nil, then we don't need to handle
   157  	// any watcher events, but we will need to keep all CiliumNodes in
   158  	// memory because 'ciliumNodeStore' is used across the operator
   159  	// to get the latest state of a CiliumNode.
   160  	if s.withKVStore || s.nodeManager != nil {
   161  		resourceEventHandler = cache.ResourceEventHandlerFuncs{
   162  			AddFunc: func(obj interface{}) {
   163  				key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
   164  				if err != nil {
   165  					log.WithError(err).Warning("Unable to process CiliumNode Add event")
   166  					return
   167  				}
   168  				if s.nodeManager != nil {
   169  					ciliumNodeManagerQueue.Add(key)
   170  				}
   171  				if s.withKVStore {
   172  					kvStoreQueue.Add(key)
   173  				}
   174  			},
   175  			UpdateFunc: func(oldObj, newObj interface{}) {
   176  				if oldNode := informer.CastInformerEvent[cilium_v2.CiliumNode](oldObj); oldNode != nil {
   177  					if newNode := informer.CastInformerEvent[cilium_v2.CiliumNode](newObj); newNode != nil {
   178  						if oldNode.DeepEqual(newNode) {
   179  							return
   180  						}
   181  						key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(newObj)
   182  						if err != nil {
   183  							log.WithError(err).Warning("Unable to process CiliumNode Update event")
   184  							return
   185  						}
   186  						if s.nodeManager != nil {
   187  							ciliumNodeManagerQueue.Add(key)
   188  						}
   189  						if s.withKVStore {
   190  							kvStoreQueue.Add(key)
   191  						}
   192  					} else {
   193  						log.Warningf("Unknown CiliumNode object type %T received: %+v", newNode, newNode)
   194  					}
   195  				} else {
   196  					log.Warningf("Unknown CiliumNode object type %T received: %+v", oldNode, oldNode)
   197  				}
   198  			},
   199  			DeleteFunc: func(obj interface{}) {
   200  				key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
   201  				if err != nil {
   202  					log.WithError(err).Warning("Unable to process CiliumNode Delete event")
   203  					return
   204  				}
   205  				if s.nodeManager != nil {
   206  					ciliumNodeManagerQueue.Add(key)
   207  				}
   208  				if s.withKVStore {
   209  					kvStoreQueue.Add(key)
   210  				}
   211  			},
   212  		}
   213  	}
   214  
   215  	// TODO: The operator is currently storing a full copy of the
   216  	// CiliumNode resource, as the resource grows, we may want to consider
   217  	// introducing a slim version of it.
   218  	var ciliumNodeInformer cache.Controller
   219  	s.ciliumNodeStore, ciliumNodeInformer = informer.NewInformer(
   220  		utils.ListerWatcherFromTyped[*cilium_v2.CiliumNodeList](s.clientset.CiliumV2().CiliumNodes()),
   221  		&cilium_v2.CiliumNode{},
   222  		0,
   223  		resourceEventHandler,
   224  		nil,
   225  	)
   226  
   227  	wg.Add(1)
   228  	go func() {
   229  		defer wg.Done()
   230  
   231  		cache.WaitForCacheSync(ctx.Done(), ciliumNodeInformer.HasSynced)
   232  		close(s.k8sCiliumNodesCacheSynced)
   233  		ciliumNodeManagerQueue.Add(ciliumNodeManagerQueueSyncedKey{})
   234  		log.Info("CiliumNodes caches synced with Kubernetes")
   235  		// Only handle events if nodeManagerSyncHandler is not nil. If it is nil
   236  		// then there isn't any event handler set for CiliumNodes events.
   237  		if nodeManagerSyncHandler != nil {
   238  			go func() {
   239  				// infinite loop. run in a goroutine to unblock code execution
   240  				for s.processNextWorkItem(ciliumNodeManagerQueue, nodeManagerSyncHandler) {
   241  				}
   242  			}()
   243  		}
   244  		// Start handling events for KVStore **after** nodeManagerSyncHandler
   245  		// otherwise Cilium Operator will block until the KVStore is available.
   246  		// This might be problematic in clusters that have etcd-operator with
   247  		// cluster-pool ipam mode because they depend on Cilium Operator to be
   248  		// running and handling IP Addresses with nodeManagerSyncHandler.
   249  		// Only handle events if kvStoreSyncHandler is not nil. If it is nil
   250  		// then there isn't any event handler set for CiliumNodes events.
   251  		if s.withKVStore && kvStoreSyncHandler != nil {
   252  			<-connectedToKVStore
   253  			log.Info("Connected to the KVStore, syncing CiliumNodes to the KVStore")
   254  			// infinite loop it will block code execution
   255  			for s.processNextWorkItem(kvStoreQueue, kvStoreSyncHandler) {
   256  			}
   257  		}
   258  	}()
   259  
   260  	wg.Add(1)
   261  	go func() {
   262  		defer wg.Done()
   263  		defer kvStoreQueue.ShutDown()
   264  		defer ciliumNodeManagerQueue.ShutDown()
   265  
   266  		ciliumNodeInformer.Run(ctx.Done())
   267  	}()
   268  
   269  	return nil
   270  }
   271  
   272  func (s *ciliumNodeSynchronizer) syncHandlerConstructor(notFoundHandler func(node *cilium_v2.CiliumNode), foundHandler func(node *cilium_v2.CiliumNode)) func(key string) error {
   273  	return func(key string) error {
   274  		_, name, err := cache.SplitMetaNamespaceKey(key)
   275  		if err != nil {
   276  			log.WithError(err).Error("Unable to process CiliumNode event")
   277  			return err
   278  		}
   279  		obj, exists, err := s.ciliumNodeStore.GetByKey(name)
   280  
   281  		// Delete handling
   282  		if !exists || errors.IsNotFound(err) {
   283  			notFoundHandler(&cilium_v2.CiliumNode{
   284  				ObjectMeta: meta_v1.ObjectMeta{
   285  					Name: name,
   286  				},
   287  			})
   288  			return nil
   289  		}
   290  		if err != nil {
   291  			log.WithError(err).Warning("Unable to retrieve CiliumNode from watcher store")
   292  			return err
   293  		}
   294  		cn, ok := obj.(*cilium_v2.CiliumNode)
   295  		if !ok {
   296  			tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
   297  			if !ok {
   298  				return fmt.Errorf("couldn't get object from tombstone %T", obj)
   299  			}
   300  			cn, ok = tombstone.Obj.(*cilium_v2.CiliumNode)
   301  			if !ok {
   302  				return fmt.Errorf("tombstone contained object that is not a *cilium_v2.CiliumNode %T", obj)
   303  			}
   304  		}
   305  		if cn.DeletionTimestamp != nil {
   306  			notFoundHandler(cn)
   307  			return nil
   308  		}
   309  		foundHandler(cn)
   310  		return nil
   311  	}
   312  }
   313  
   314  // processNextWorkItem process all events from the workqueue.
   315  func (s *ciliumNodeSynchronizer) processNextWorkItem(queue workqueue.RateLimitingInterface, syncHandler func(key string) error) bool {
   316  	key, quit := queue.Get()
   317  	if quit {
   318  		return false
   319  	}
   320  	defer queue.Done(key)
   321  
   322  	if _, ok := key.(ciliumNodeManagerQueueSyncedKey); ok {
   323  		close(s.ciliumNodeManagerQueueSynced)
   324  		return true
   325  	}
   326  
   327  	err := syncHandler(key.(string))
   328  	if err == nil {
   329  		// If err is nil we can forget it from the queue, if it is not nil
   330  		// the queue handler will retry to process this key until it succeeds.
   331  		queue.Forget(key)
   332  		return true
   333  	}
   334  
   335  	log.WithError(err).Errorf("sync %q failed with %v", key, err)
   336  	queue.AddRateLimited(key)
   337  
   338  	return true
   339  }
   340  
   341  type ciliumNodeUpdateImplementation struct {
   342  	clientset k8sClient.Clientset
   343  }
   344  
   345  func (c *ciliumNodeUpdateImplementation) Create(node *cilium_v2.CiliumNode) (*cilium_v2.CiliumNode, error) {
   346  	return c.clientset.CiliumV2().CiliumNodes().Create(context.TODO(), node, meta_v1.CreateOptions{})
   347  }
   348  
   349  func (c *ciliumNodeUpdateImplementation) Get(node string) (*cilium_v2.CiliumNode, error) {
   350  	return c.clientset.CiliumV2().CiliumNodes().Get(context.TODO(), node, meta_v1.GetOptions{})
   351  }
   352  
   353  func (c *ciliumNodeUpdateImplementation) UpdateStatus(origNode, node *cilium_v2.CiliumNode) (*cilium_v2.CiliumNode, error) {
   354  	if origNode == nil || !origNode.Status.DeepEqual(&node.Status) {
   355  		return c.clientset.CiliumV2().CiliumNodes().UpdateStatus(context.TODO(), node, meta_v1.UpdateOptions{})
   356  	}
   357  	return nil, nil
   358  }
   359  
   360  func (c *ciliumNodeUpdateImplementation) Update(origNode, node *cilium_v2.CiliumNode) (*cilium_v2.CiliumNode, error) {
   361  	if origNode == nil || !origNode.Spec.DeepEqual(&node.Spec) {
   362  		return c.clientset.CiliumV2().CiliumNodes().Update(context.TODO(), node, meta_v1.UpdateOptions{})
   363  	}
   364  	return nil, nil
   365  }