github.com/cilium/cilium@v1.16.2/pkg/k8s/watchers/watcher.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package watchers
     5  
     6  import (
     7  	"context"
     8  	"net"
     9  	"net/netip"
    10  	"sync"
    11  
    12  	"github.com/sirupsen/logrus"
    13  	"k8s.io/apimachinery/pkg/util/runtime"
    14  
    15  	"github.com/cilium/cilium/pkg/endpoint"
    16  	"github.com/cilium/cilium/pkg/ipcache"
    17  	ipcacheTypes "github.com/cilium/cilium/pkg/ipcache/types"
    18  	"github.com/cilium/cilium/pkg/k8s"
    19  	cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    20  	"github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1"
    21  	"github.com/cilium/cilium/pkg/k8s/client"
    22  	slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
    23  	"github.com/cilium/cilium/pkg/k8s/synced"
    24  	"github.com/cilium/cilium/pkg/k8s/watchers/resources"
    25  	"github.com/cilium/cilium/pkg/labels"
    26  	"github.com/cilium/cilium/pkg/loadbalancer"
    27  	"github.com/cilium/cilium/pkg/logging"
    28  	"github.com/cilium/cilium/pkg/logging/logfields"
    29  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    30  	"github.com/cilium/cilium/pkg/option"
    31  	"github.com/cilium/cilium/pkg/redirectpolicy"
    32  	"github.com/cilium/cilium/pkg/source"
    33  )
    34  
    35  const (
    36  	k8sAPIGroupNamespaceV1Core                  = "core/v1::Namespace"
    37  	K8sAPIGroupServiceV1Core                    = "core/v1::Service"
    38  	k8sAPIGroupNetworkingV1Core                 = "networking.k8s.io/v1::NetworkPolicy"
    39  	k8sAPIGroupCiliumNetworkPolicyV2            = "cilium/v2::CiliumNetworkPolicy"
    40  	k8sAPIGroupCiliumClusterwideNetworkPolicyV2 = "cilium/v2::CiliumClusterwideNetworkPolicy"
    41  	k8sAPIGroupCiliumCIDRGroupV2Alpha1          = "cilium/v2alpha1::CiliumCIDRGroup"
    42  	k8sAPIGroupCiliumNodeV2                     = "cilium/v2::CiliumNode"
    43  	k8sAPIGroupCiliumEndpointV2                 = "cilium/v2::CiliumEndpoint"
    44  	k8sAPIGroupCiliumLocalRedirectPolicyV2      = "cilium/v2::CiliumLocalRedirectPolicy"
    45  	k8sAPIGroupCiliumEndpointSliceV2Alpha1      = "cilium/v2alpha1::CiliumEndpointSlice"
    46  	k8sAPIGroupCiliumEnvoyConfigV2              = "cilium/v2::CiliumEnvoyConfig"
    47  	k8sAPIGroupCiliumClusterwideEnvoyConfigV2   = "cilium/v2::CiliumClusterwideEnvoyConfig"
    48  
    49  	metricCLRP = "CiliumLocalRedirectPolicy"
    50  	metricPod  = "Pod"
    51  )
    52  
    53  func init() {
    54  	// Replace error handler with our own
    55  	runtime.ErrorHandlers = []func(error){
    56  		k8s.K8sErrorHandler,
    57  	}
    58  }
    59  
    60  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "k8s-watcher")
    61  
    62  type endpointManager interface {
    63  	LookupCEPName(string) *endpoint.Endpoint
    64  	GetEndpoints() []*endpoint.Endpoint
    65  	GetHostEndpoint() *endpoint.Endpoint
    66  	GetEndpointsByPodName(string) []*endpoint.Endpoint
    67  	WaitForEndpointsAtPolicyRev(ctx context.Context, rev uint64) error
    68  	UpdatePolicyMaps(context.Context, *sync.WaitGroup) *sync.WaitGroup
    69  }
    70  
    71  type nodeManager interface {
    72  	NodeDeleted(n nodeTypes.Node)
    73  	NodeUpdated(n nodeTypes.Node)
    74  	NodeSync()
    75  }
    76  
    77  type policyManager interface {
    78  	TriggerPolicyUpdates(force bool, reason string)
    79  }
    80  
    81  type svcManager interface {
    82  	DeleteService(frontend loadbalancer.L3n4Addr) (bool, error)
    83  	GetDeepCopyServiceByFrontend(frontend loadbalancer.L3n4Addr) (*loadbalancer.SVC, bool)
    84  	UpsertService(*loadbalancer.SVC) (bool, loadbalancer.ID, error)
    85  }
    86  
    87  type redirectPolicyManager interface {
    88  	AddRedirectPolicy(config redirectpolicy.LRPConfig) (bool, error)
    89  	DeleteRedirectPolicy(config redirectpolicy.LRPConfig) error
    90  	OnAddService(svcID k8s.ServiceID)
    91  	OnDeleteService(svcID k8s.ServiceID)
    92  	OnUpdatePod(pod *slim_corev1.Pod, needsReassign bool, ready bool)
    93  	OnDeletePod(pod *slim_corev1.Pod)
    94  	OnAddPod(pod *slim_corev1.Pod)
    95  }
    96  
    97  type bgpSpeakerManager interface {
    98  	OnUpdateService(svc *slim_corev1.Service) error
    99  	OnDeleteService(svc *slim_corev1.Service) error
   100  
   101  	OnUpdateEndpoints(eps *k8s.Endpoints) error
   102  }
   103  
   104  type cgroupManager interface {
   105  	OnAddPod(pod *slim_corev1.Pod)
   106  	OnUpdatePod(oldPod, newPod *slim_corev1.Pod)
   107  	OnDeletePod(pod *slim_corev1.Pod)
   108  }
   109  
   110  type CacheAccessK8SWatcher interface {
   111  	GetCachedNamespace(namespace string) (*slim_corev1.Namespace, error)
   112  	GetCachedPod(namespace, name string) (*slim_corev1.Pod, error)
   113  }
   114  
   115  type ipcacheManager interface {
   116  	// GH-21142: Re-evaluate the need for these APIs
   117  	Upsert(ip string, hostIP net.IP, hostKey uint8, k8sMeta *ipcache.K8sMetadata, newIdentity ipcache.Identity) (namedPortsChanged bool, err error)
   118  	LookupByIP(IP string) (ipcache.Identity, bool)
   119  	Delete(IP string, source source.Source) (namedPortsChanged bool)
   120  
   121  	UpsertLabels(prefix netip.Prefix, lbls labels.Labels, src source.Source, resource ipcacheTypes.ResourceID)
   122  	RemoveLabelsExcluded(lbls labels.Labels, toExclude map[netip.Prefix]struct{}, resource ipcacheTypes.ResourceID)
   123  	DeleteOnMetadataMatch(IP string, source source.Source, namespace, name string) (namedPortsChanged bool)
   124  }
   125  
   126  type K8sWatcher struct {
   127  	resourceGroupsFn func(cfg WatcherConfiguration) (resourceGroups, waitForCachesOnly []string)
   128  
   129  	clientset client.Clientset
   130  
   131  	k8sEventReporter          *K8sEventReporter
   132  	k8sPodWatcher             *K8sPodWatcher
   133  	k8sCiliumNodeWatcher      *K8sCiliumNodeWatcher
   134  	k8sNamespaceWatcher       *K8sNamespaceWatcher
   135  	k8sServiceWatcher         *K8sServiceWatcher
   136  	k8sEndpointsWatcher       *K8sEndpointsWatcher
   137  	k8sCiliumLRPWatcher       *K8sCiliumLRPWatcher
   138  	k8sCiliumEndpointsWatcher *K8sCiliumEndpointsWatcher
   139  
   140  	// k8sResourceSynced maps a resource name to a channel. Once the given
   141  	// resource name is synchronized with k8s, the channel for which that
   142  	// resource name maps to is closed.
   143  	k8sResourceSynced *synced.Resources
   144  
   145  	// k8sAPIGroups is a set of k8s API in use. They are setup in watchers,
   146  	// and may be disabled while the agent runs.
   147  	k8sAPIGroups *synced.APIGroups
   148  
   149  	cfg WatcherConfiguration
   150  }
   151  
   152  func newWatcher(
   153  	clientset client.Clientset,
   154  	k8sPodWatcher *K8sPodWatcher,
   155  	k8sCiliumNodeWatcher *K8sCiliumNodeWatcher,
   156  	k8sNamespaceWatcher *K8sNamespaceWatcher,
   157  	k8sServiceWatcher *K8sServiceWatcher,
   158  	k8sEndpointsWatcher *K8sEndpointsWatcher,
   159  	k8sCiliumLRPWatcher *K8sCiliumLRPWatcher,
   160  	k8sCiliumEndpointsWatcher *K8sCiliumEndpointsWatcher,
   161  	k8sEventReporter *K8sEventReporter,
   162  	k8sResourceSynced *synced.Resources,
   163  	k8sAPIGroups *synced.APIGroups,
   164  	cfg WatcherConfiguration,
   165  ) *K8sWatcher {
   166  	return &K8sWatcher{
   167  		resourceGroupsFn:          resourceGroups,
   168  		clientset:                 clientset,
   169  		k8sEventReporter:          k8sEventReporter,
   170  		k8sPodWatcher:             k8sPodWatcher,
   171  		k8sCiliumNodeWatcher:      k8sCiliumNodeWatcher,
   172  		k8sNamespaceWatcher:       k8sNamespaceWatcher,
   173  		k8sServiceWatcher:         k8sServiceWatcher,
   174  		k8sEndpointsWatcher:       k8sEndpointsWatcher,
   175  		k8sCiliumLRPWatcher:       k8sCiliumLRPWatcher,
   176  		k8sCiliumEndpointsWatcher: k8sCiliumEndpointsWatcher,
   177  		k8sResourceSynced:         k8sResourceSynced,
   178  		k8sAPIGroups:              k8sAPIGroups,
   179  		cfg:                       cfg,
   180  	}
   181  }
   182  
   183  // WaitForCacheSync blocks until the given resources have been synchronized from k8s.  Note that if
   184  // the controller for a resource has not been started, the wait for that resource returns
   185  // immediately. If it is required that the resource exists and is actually synchronized, the caller
   186  // must ensure the controller for that resource has been started before calling
   187  // WaitForCacheSync. For most resources this can be done by receiving from controllersStarted
   188  // channel (<-k.controllersStarted), which is closed after most watchers have been started.
   189  func (k *K8sWatcher) WaitForCacheSync(resourceNames ...string) {
   190  	k.k8sResourceSynced.WaitForCacheSync(resourceNames...)
   191  }
   192  
   193  func (k *K8sWatcher) GetAPIGroups() []string {
   194  	return k.k8sAPIGroups.GetGroups()
   195  }
   196  
   197  // WaitForCRDsToRegister will wait for the Cilium Operator to register the CRDs
   198  // with the apiserver. This step is required before launching the full K8s
   199  // watcher, as those resource controllers need the resources to be registered
   200  // with K8s first.
   201  func (k *K8sWatcher) WaitForCRDsToRegister(ctx context.Context) error {
   202  	return synced.SyncCRDs(ctx, k.clientset, synced.AgentCRDResourceNames(), k.k8sResourceSynced, k.k8sAPIGroups)
   203  }
   204  
   205  type watcherKind int
   206  
   207  const (
   208  	// skip causes watcher to not be started.
   209  	skip watcherKind = iota
   210  
   211  	// start causes watcher to be started as soon as possible.
   212  	start
   213  
   214  	// waitOnly will not start a watcher for this resource, but cause us to
   215  	// wait for an external go routine to initialize it
   216  	waitOnly
   217  )
   218  
   219  type watcherInfo struct {
   220  	kind  watcherKind
   221  	group string
   222  }
   223  
   224  var ciliumResourceToGroupMapping = map[string]watcherInfo{
   225  	synced.CRDResourceName(cilium_v2.CNPName):           {waitOnly, k8sAPIGroupCiliumNetworkPolicyV2},            // Handled in pkg/policy/k8s/
   226  	synced.CRDResourceName(cilium_v2.CCNPName):          {waitOnly, k8sAPIGroupCiliumClusterwideNetworkPolicyV2}, // Handled in pkg/policy/k8s/
   227  	synced.CRDResourceName(cilium_v2.CEPName):           {start, k8sAPIGroupCiliumEndpointV2},                    // ipcache
   228  	synced.CRDResourceName(cilium_v2.CNName):            {start, k8sAPIGroupCiliumNodeV2},
   229  	synced.CRDResourceName(cilium_v2.CIDName):           {skip, ""}, // Handled in pkg/k8s/identitybackend/
   230  	synced.CRDResourceName(cilium_v2.CLRPName):          {start, k8sAPIGroupCiliumLocalRedirectPolicyV2},
   231  	synced.CRDResourceName(cilium_v2.CEWName):           {skip, ""}, // Handled in clustermesh-apiserver/
   232  	synced.CRDResourceName(cilium_v2.CEGPName):          {skip, ""}, // Handled via Resource[T].
   233  	synced.CRDResourceName(v2alpha1.CESName):            {start, k8sAPIGroupCiliumEndpointSliceV2Alpha1},
   234  	synced.CRDResourceName(cilium_v2.CCECName):          {waitOnly, k8sAPIGroupCiliumClusterwideEnvoyConfigV2}, // Handled in pkg/ciliumenvoyconfig/
   235  	synced.CRDResourceName(cilium_v2.CECName):           {waitOnly, k8sAPIGroupCiliumEnvoyConfigV2},            // Handled in pkg/ciliumenvoyconfig/
   236  	synced.CRDResourceName(v2alpha1.BGPPName):           {skip, ""},                                            // Handled in BGP control plane
   237  	synced.CRDResourceName(v2alpha1.BGPCCName):          {skip, ""},                                            // Handled in BGP control plane
   238  	synced.CRDResourceName(v2alpha1.BGPAName):           {skip, ""},                                            // Handled in BGP control plane
   239  	synced.CRDResourceName(v2alpha1.BGPPCName):          {skip, ""},                                            // Handled in BGP control plane
   240  	synced.CRDResourceName(v2alpha1.BGPNCName):          {skip, ""},                                            // Handled in BGP control plane
   241  	synced.CRDResourceName(v2alpha1.BGPNCOName):         {skip, ""},                                            // Handled in BGP control plane
   242  	synced.CRDResourceName(v2alpha1.LBIPPoolName):       {skip, ""},                                            // Handled in LB IPAM
   243  	synced.CRDResourceName(v2alpha1.CNCName):            {skip, ""},                                            // Handled by init directly
   244  	synced.CRDResourceName(v2alpha1.CCGName):            {waitOnly, k8sAPIGroupCiliumCIDRGroupV2Alpha1},
   245  	synced.CRDResourceName(v2alpha1.L2AnnouncementName): {skip, ""}, // Handled by L2 announcement directly
   246  	synced.CRDResourceName(v2alpha1.CPIPName):           {skip, ""}, // Handled by multi-pool IPAM allocator
   247  }
   248  
   249  // resourceGroups are all of the core Kubernetes and Cilium resource groups
   250  // which the Cilium agent watches to implement CNI functionality.
   251  func resourceGroups(cfg WatcherConfiguration) (resourceGroups, waitForCachesOnly []string) {
   252  	k8sGroups := []string{
   253  		// To perform the service translation and have the BPF LB datapath
   254  		// with the right service -> backend (k8s endpoints) translation.
   255  		K8sAPIGroupServiceV1Core,
   256  
   257  		// Namespaces can contain labels which are essential for
   258  		// endpoints being restored to have the right identity.
   259  		k8sAPIGroupNamespaceV1Core,
   260  		// Pods can contain labels which are essential for endpoints
   261  		// being restored to have the right identity.
   262  		resources.K8sAPIGroupPodV1Core,
   263  		// To perform the service translation and have the BPF LB datapath
   264  		// with the right service -> backend (k8s endpoints) translation.
   265  		resources.K8sAPIGroupEndpointSliceOrEndpoint,
   266  	}
   267  
   268  	if cfg.K8sNetworkPolicyEnabled() {
   269  		// When the flag is set,
   270  		// We need all network policies in place before restoring to
   271  		// make sure we are enforcing the correct policies for each
   272  		// endpoint before restarting.
   273  		waitForCachesOnly = append(waitForCachesOnly, k8sAPIGroupNetworkingV1Core)
   274  	}
   275  
   276  	ciliumResources := synced.AgentCRDResourceNames()
   277  	ciliumGroups := make([]string, 0, len(ciliumResources))
   278  	for _, r := range ciliumResources {
   279  		groupInfo, ok := ciliumResourceToGroupMapping[r]
   280  		if !ok {
   281  			log.Fatalf("Unknown resource %s. Please update pkg/k8s/watchers to understand this type.", r)
   282  		}
   283  		switch groupInfo.kind {
   284  		case skip:
   285  			continue
   286  		case start:
   287  			ciliumGroups = append(ciliumGroups, groupInfo.group)
   288  		case waitOnly:
   289  			waitForCachesOnly = append(waitForCachesOnly, groupInfo.group)
   290  		}
   291  	}
   292  
   293  	return append(k8sGroups, ciliumGroups...), waitForCachesOnly
   294  }
   295  
   296  // InitK8sSubsystem takes a channel for which it will be closed when all
   297  // caches essential for daemon are synchronized.
   298  // It initializes the K8s subsystem and starts the watchers for the resources
   299  // that the daemon is interested in.
   300  // The cachesSynced channel is closed when all caches are synchronized.
   301  // To be called after WaitForCRDsToRegister() so that all needed CRDs have
   302  // already been registered.
   303  func (k *K8sWatcher) InitK8sSubsystem(ctx context.Context, cachesSynced chan struct{}) {
   304  	resources, cachesOnly := k.resourceGroupsFn(k.cfg)
   305  
   306  	log.Info("Enabling k8s event listener")
   307  	k.enableK8sWatchers(ctx, resources)
   308  	close(k.k8sPodWatcher.controllersStarted)
   309  
   310  	go func() {
   311  		log.Info("Waiting until all pre-existing resources have been received")
   312  		allResources := append(resources, cachesOnly...)
   313  		if err := k.k8sResourceSynced.WaitForCacheSyncWithTimeout(option.Config.K8sSyncTimeout, allResources...); err != nil {
   314  			log.WithError(err).Fatal("Timed out waiting for pre-existing resources to be received; exiting")
   315  		}
   316  		close(cachesSynced)
   317  	}()
   318  }
   319  
   320  // WatcherConfiguration is the required configuration for enableK8sWatchers
   321  type WatcherConfiguration interface {
   322  	// K8sNetworkPolicyEnabled returns true if cilium agent needs to support K8s NetworkPolicy
   323  	K8sNetworkPolicyEnabled() bool
   324  }
   325  
   326  // enableK8sWatchers starts watchers for given resources.
   327  func (k *K8sWatcher) enableK8sWatchers(ctx context.Context, resourceNames []string) {
   328  	if !k.clientset.IsEnabled() {
   329  		log.Debug("Not enabling k8s event listener because k8s is not enabled")
   330  		return
   331  	}
   332  	asyncControllers := &sync.WaitGroup{}
   333  
   334  	for _, r := range resourceNames {
   335  		switch r {
   336  		// Core Cilium
   337  		case resources.K8sAPIGroupPodV1Core:
   338  			asyncControllers.Add(1)
   339  			go k.k8sPodWatcher.podsInit(asyncControllers)
   340  		case k8sAPIGroupNamespaceV1Core:
   341  			k.k8sNamespaceWatcher.namespacesInit()
   342  		case k8sAPIGroupCiliumNodeV2:
   343  			asyncControllers.Add(1)
   344  			go k.k8sCiliumNodeWatcher.ciliumNodeInit(ctx, asyncControllers)
   345  		case resources.K8sAPIGroupServiceV1Core:
   346  			k.k8sServiceWatcher.servicesInit()
   347  		case resources.K8sAPIGroupEndpointSliceOrEndpoint:
   348  			k.k8sEndpointsWatcher.endpointsInit()
   349  		case k8sAPIGroupCiliumEndpointV2:
   350  			k.k8sCiliumEndpointsWatcher.initCiliumEndpointOrSlices(ctx, asyncControllers)
   351  		case k8sAPIGroupCiliumEndpointSliceV2Alpha1:
   352  			// no-op; handled in k8sAPIGroupCiliumEndpointV2
   353  		case k8sAPIGroupCiliumLocalRedirectPolicyV2:
   354  			k.k8sCiliumLRPWatcher.ciliumLocalRedirectPolicyInit()
   355  		default:
   356  			log.WithFields(logrus.Fields{
   357  				logfields.Resource: r,
   358  			}).Fatal("Not listening for Kubernetes resource updates for unhandled type")
   359  		}
   360  	}
   361  
   362  	asyncControllers.Wait()
   363  }
   364  
   365  func (k *K8sWatcher) StopWatcher() {
   366  	k.k8sNamespaceWatcher.stopWatcher()
   367  	k.k8sServiceWatcher.stopWatcher()
   368  	k.k8sEndpointsWatcher.stopWatcher()
   369  	k.k8sCiliumLRPWatcher.stopWatcher()
   370  }
   371  
   372  // K8sEventProcessed is called to do metrics accounting for each processed
   373  // Kubernetes event
   374  func (k *K8sWatcher) K8sEventProcessed(scope, action string, status bool) {
   375  	k.k8sEventReporter.K8sEventProcessed(scope, action, status)
   376  }
   377  
   378  // K8sEventReceived does metric accounting for each received Kubernetes event, as well
   379  // as notifying of events for k8s resources synced.
   380  func (k *K8sWatcher) K8sEventReceived(apiResourceName, scope, action string, valid, equal bool) {
   381  	k.k8sEventReporter.K8sEventReceived(apiResourceName, scope, action, valid, equal)
   382  }
   383  
   384  // GetCachedPod returns a pod from the local store.
   385  func (k *K8sWatcher) GetCachedPod(namespace, name string) (*slim_corev1.Pod, error) {
   386  	return k.k8sPodWatcher.GetCachedPod(namespace, name)
   387  }
   388  
   389  // GetCachedNamespace returns a namespace from the local store.
   390  func (k *K8sWatcher) GetCachedNamespace(namespace string) (*slim_corev1.Namespace, error) {
   391  	return k.k8sNamespaceWatcher.GetCachedNamespace(namespace)
   392  }
   393  
   394  func (k *K8sWatcher) RunK8sServiceHandler() {
   395  	k.k8sServiceWatcher.RunK8sServiceHandler()
   396  }