github.com/cilium/cilium@v1.16.2/pkg/k8s/watchers/endpoints.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package watchers
     5  
     6  import (
     7  	"context"
     8  	"net/netip"
     9  	"sync/atomic"
    10  
    11  	"github.com/cilium/hive/cell"
    12  
    13  	agentK8s "github.com/cilium/cilium/daemon/k8s"
    14  	"github.com/cilium/cilium/pkg/bgp/speaker"
    15  	"github.com/cilium/cilium/pkg/ipcache"
    16  	ipcacheTypes "github.com/cilium/cilium/pkg/ipcache/types"
    17  	"github.com/cilium/cilium/pkg/k8s"
    18  	"github.com/cilium/cilium/pkg/k8s/resource"
    19  	k8sSynced "github.com/cilium/cilium/pkg/k8s/synced"
    20  	"github.com/cilium/cilium/pkg/k8s/watchers/resources"
    21  	"github.com/cilium/cilium/pkg/labels"
    22  	"github.com/cilium/cilium/pkg/lock"
    23  	"github.com/cilium/cilium/pkg/source"
    24  )
    25  
    26  type k8sEndpointsWatcherParams struct {
    27  	cell.In
    28  
    29  	Resources         agentK8s.Resources
    30  	K8sResourceSynced *k8sSynced.Resources
    31  	K8sAPIGroups      *k8sSynced.APIGroups
    32  
    33  	ServiceCache      *k8s.ServiceCache
    34  	MetalLBBgpSpeaker speaker.MetalLBBgpSpeaker
    35  	IPCache           *ipcache.IPCache
    36  }
    37  
    38  func newK8sEndpointsWatcher(params k8sEndpointsWatcherParams) *K8sEndpointsWatcher {
    39  	return &K8sEndpointsWatcher{
    40  		k8sResourceSynced: params.K8sResourceSynced,
    41  		k8sAPIGroups:      params.K8sAPIGroups,
    42  		resources:         params.Resources,
    43  		k8sSvcCache:       params.ServiceCache,
    44  		bgpSpeakerManager: params.MetalLBBgpSpeaker,
    45  		ipcache:           params.IPCache,
    46  		stop:              make(chan struct{}),
    47  	}
    48  }
    49  
    50  type K8sEndpointsWatcher struct {
    51  	// k8sResourceSynced maps a resource name to a channel. Once the given
    52  	// resource name is synchronized with k8s, the channel for which that
    53  	// resource name maps to is closed.
    54  	k8sResourceSynced *k8sSynced.Resources
    55  	// k8sAPIGroups is a set of k8s API in use. They are setup in watchers,
    56  	// and may be disabled while the agent runs.
    57  	k8sAPIGroups *k8sSynced.APIGroups
    58  	resources    agentK8s.Resources
    59  
    60  	k8sSvcCache       *k8s.ServiceCache
    61  	bgpSpeakerManager bgpSpeakerManager
    62  	ipcache           ipcacheManager
    63  
    64  	stop chan struct{}
    65  }
    66  
    67  func (k *K8sEndpointsWatcher) endpointsInit() {
    68  	swg := lock.NewStoppableWaitGroup()
    69  
    70  	// Use EndpointSliceV1 API group for cache syncing regardless of the underlying
    71  	// real resource kind since the codepath is the same for all.
    72  	apiGroup := resources.K8sAPIGroupEndpointSliceOrEndpoint
    73  
    74  	var synced atomic.Bool
    75  
    76  	k.k8sResourceSynced.BlockWaitGroupToSyncResources(
    77  		k.stop,
    78  		swg,
    79  		func() bool { return synced.Load() },
    80  		apiGroup,
    81  	)
    82  	k.k8sAPIGroups.AddAPI(apiGroup)
    83  
    84  	ctx, cancel := context.WithCancel(context.Background())
    85  	events := k.resources.Endpoints.Events(ctx)
    86  	go func() {
    87  		for {
    88  			select {
    89  			case <-k.stop:
    90  				cancel()
    91  			case event, ok := <-events:
    92  				if !ok {
    93  					return
    94  				}
    95  				switch event.Kind {
    96  				case resource.Sync:
    97  					synced.Store(true)
    98  				case resource.Upsert:
    99  					k.k8sResourceSynced.SetEventTimestamp(apiGroup)
   100  					k.updateEndpoint(event.Object, swg)
   101  				case resource.Delete:
   102  					k.k8sResourceSynced.SetEventTimestamp(apiGroup)
   103  					k.k8sSvcCache.DeleteEndpoints(event.Object.EndpointSliceID, swg)
   104  				}
   105  				event.Done(nil)
   106  			}
   107  		}
   108  	}()
   109  }
   110  
   111  func (k *K8sEndpointsWatcher) stopWatcher() {
   112  	close(k.stop)
   113  }
   114  
   115  func (k *K8sEndpointsWatcher) updateEndpoint(eps *k8s.Endpoints, swgEps *lock.StoppableWaitGroup) {
   116  	k.k8sSvcCache.UpdateEndpoints(eps, swgEps)
   117  	k.bgpSpeakerManager.OnUpdateEndpoints(eps)
   118  	k.addKubeAPIServerServiceEndpoints(eps)
   119  }
   120  
   121  func (k *K8sEndpointsWatcher) addKubeAPIServerServiceEndpoints(eps *k8s.Endpoints) {
   122  	if eps == nil ||
   123  		eps.EndpointSliceID.ServiceID.Name != "kubernetes" ||
   124  		eps.EndpointSliceID.ServiceID.Namespace != "default" {
   125  		return
   126  	}
   127  	resource := ipcacheTypes.NewResourceID(
   128  		ipcacheTypes.ResourceKindEndpoint,
   129  		eps.ObjectMeta.GetNamespace(),
   130  		eps.ObjectMeta.GetName(),
   131  	)
   132  	desiredIPs := make(map[netip.Prefix]struct{})
   133  	for addrCluster := range eps.Backends {
   134  		addr := addrCluster.Addr()
   135  		desiredIPs[netip.PrefixFrom(addr, addr.BitLen())] = struct{}{}
   136  	}
   137  	k.handleKubeAPIServerServiceEPChanges(desiredIPs, resource)
   138  }
   139  
   140  // handleKubeAPIServerServiceEPChanges associates the set of 'desiredIPs' with
   141  // the 'reserved:kube-apiserver' label in the IPCache. This allows policy
   142  // selectors for the kube-apiserver entity to match these peers.
   143  //
   144  // Any IPs currently associated with the apiserver that are not part of
   145  // 'desiredIPs' will be disassociated from the apiserver following a call to
   146  // this function.
   147  //
   148  // The actual implementation of this logic down to the datapath is handled
   149  // asynchronously.
   150  func (k *K8sEndpointsWatcher) handleKubeAPIServerServiceEPChanges(desiredIPs map[netip.Prefix]struct{}, rid ipcacheTypes.ResourceID) {
   151  	src := source.KubeAPIServer
   152  
   153  	// We must perform a diff on the ipcache.identityMetadata map in order to
   154  	// figure out which IPs are stale and should be removed, before we inject
   155  	// new IPs into the ipcache. The reason is because kube-apiserver will
   156  	// constantly reconcile this specific object, even when it's been deleted;
   157  	// effectively, this means we can avoid listening for the delete event.
   158  	// Therefore, any changes to this specific object can be handled in a
   159  	// "flattened" manner, since the most up-to-date form of it will be an add
   160  	// or update event. The former is sent when Cilium is syncing with K8s and
   161  	// the latter is sent anytime after.
   162  	//
   163  	// For example:
   164  	//   * if a backend is removed or updated, then this will be in the form of
   165  	//     an update event.
   166  	//   * if the entire object is deleted, then it will quickly be recreated
   167  	//     and this will be in the form of an add event.
   168  	k.ipcache.RemoveLabelsExcluded(
   169  		labels.LabelKubeAPIServer,
   170  		desiredIPs,
   171  		rid,
   172  	)
   173  
   174  	for ip := range desiredIPs {
   175  		k.ipcache.UpsertLabels(ip, labels.LabelKubeAPIServer, src, rid)
   176  	}
   177  }