github.com/cilium/cilium@v1.16.2/pkg/bgpv1/manager/reconciler/service.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package reconciler
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net/netip"
    10  	"slices"
    11  
    12  	"github.com/cilium/hive/cell"
    13  	"golang.org/x/exp/maps"
    14  	corev1 "k8s.io/api/core/v1"
    15  	"k8s.io/apimachinery/pkg/util/sets"
    16  
    17  	"github.com/cilium/cilium/pkg/bgpv1/manager/instance"
    18  	"github.com/cilium/cilium/pkg/bgpv1/manager/store"
    19  	"github.com/cilium/cilium/pkg/bgpv1/types"
    20  	"github.com/cilium/cilium/pkg/k8s"
    21  	v2alpha1api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1"
    22  	"github.com/cilium/cilium/pkg/k8s/resource"
    23  	slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
    24  	"github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/labels"
    25  	slim_metav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1"
    26  	ciliumslices "github.com/cilium/cilium/pkg/slices"
    27  )
    28  
    29  type LBServiceReconcilerOut struct {
    30  	cell.Out
    31  
    32  	Reconciler ConfigReconciler `group:"bgp-config-reconciler"`
    33  }
    34  
    35  type ServiceReconciler struct {
    36  	diffStore   store.DiffStore[*slim_corev1.Service]
    37  	epDiffStore store.DiffStore[*k8s.Endpoints]
    38  }
    39  
    40  // LBServiceReconcilerMetadata keeps a map of services to the respective advertised Paths
    41  type LBServiceReconcilerMetadata map[resource.Key][]*types.Path
    42  
    43  type localServices map[k8s.ServiceID]struct{}
    44  
    45  func NewServiceReconciler(diffStore store.DiffStore[*slim_corev1.Service], epDiffStore store.DiffStore[*k8s.Endpoints]) LBServiceReconcilerOut {
    46  	if diffStore == nil {
    47  		return LBServiceReconcilerOut{}
    48  	}
    49  
    50  	return LBServiceReconcilerOut{
    51  		Reconciler: &ServiceReconciler{
    52  			diffStore:   diffStore,
    53  			epDiffStore: epDiffStore,
    54  		},
    55  	}
    56  }
    57  
    58  func (r *ServiceReconciler) Name() string {
    59  	return "Service"
    60  }
    61  
    62  func (r *ServiceReconciler) Priority() int {
    63  	return 40
    64  }
    65  
    66  func (r *ServiceReconciler) Init(sc *instance.ServerWithConfig) error {
    67  	if sc == nil {
    68  		return fmt.Errorf("BUG: service reconciler initialization with nil ServerWithConfig")
    69  	}
    70  	r.diffStore.InitDiff(r.diffID(sc.ASN))
    71  	r.epDiffStore.InitDiff(r.diffID(sc.ASN))
    72  	return nil
    73  }
    74  
    75  func (r *ServiceReconciler) Cleanup(sc *instance.ServerWithConfig) {
    76  	if sc != nil {
    77  		r.diffStore.CleanupDiff(r.diffID(sc.ASN))
    78  		r.epDiffStore.CleanupDiff(r.diffID(sc.ASN))
    79  	}
    80  }
    81  
    82  func (r *ServiceReconciler) Reconcile(ctx context.Context, p ReconcileParams) error {
    83  	if p.CiliumNode == nil {
    84  		return fmt.Errorf("attempted service reconciliation with nil local CiliumNode")
    85  	}
    86  
    87  	ls, err := r.populateLocalServices(p.CiliumNode.Name)
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	if r.requiresFullReconciliation(p) {
    93  		return r.fullReconciliation(ctx, p.CurrentServer, p.DesiredConfig, ls)
    94  	}
    95  	return r.svcDiffReconciliation(ctx, p.CurrentServer, p.DesiredConfig, ls)
    96  }
    97  
    98  func (r *ServiceReconciler) getMetadata(sc *instance.ServerWithConfig) LBServiceReconcilerMetadata {
    99  	if _, found := sc.ReconcilerMetadata[r.Name()]; !found {
   100  		sc.ReconcilerMetadata[r.Name()] = make(LBServiceReconcilerMetadata)
   101  	}
   102  	return sc.ReconcilerMetadata[r.Name()].(LBServiceReconcilerMetadata)
   103  }
   104  
   105  func (r *ServiceReconciler) resolveSvcFromEndpoints(eps *k8s.Endpoints) (*slim_corev1.Service, bool, error) {
   106  	k := resource.Key{
   107  		Name:      eps.ServiceID.Name,
   108  		Namespace: eps.ServiceID.Namespace,
   109  	}
   110  	return r.diffStore.GetByKey(k)
   111  }
   112  
   113  // requiresFullReconciliation returns true if the desired config requires full reconciliation
   114  // (reconciliation of all services), false if partial (diff) reconciliation is sufficient.
   115  func (r *ServiceReconciler) requiresFullReconciliation(p ReconcileParams) bool {
   116  	var existingSelector *slim_metav1.LabelSelector
   117  	if p.CurrentServer != nil && p.CurrentServer.Config != nil {
   118  		existingSelector = p.CurrentServer.Config.ServiceSelector
   119  	} else {
   120  		return true // the first reconciliation should be always full
   121  	}
   122  	// If the existing selector was updated, went from nil to something or something to nil, we need to perform full
   123  	// reconciliation and check if every existing announcement's service still matches the selector.
   124  	return (existingSelector != nil && p.DesiredConfig.ServiceSelector != nil && !p.DesiredConfig.ServiceSelector.DeepEqual(existingSelector)) ||
   125  		((existingSelector == nil) != (p.DesiredConfig.ServiceSelector == nil))
   126  }
   127  
   128  // Populate locally available services used for externalTrafficPolicy=local handling
   129  func (r *ServiceReconciler) populateLocalServices(localNodeName string) (localServices, error) {
   130  	ls := make(localServices)
   131  
   132  	epList, err := r.epDiffStore.List()
   133  	if err != nil {
   134  		return nil, fmt.Errorf("failed to list endpoints from diffstore: %w", err)
   135  	}
   136  
   137  endpointsLoop:
   138  	for _, eps := range epList {
   139  		_, exists, err := r.resolveSvcFromEndpoints(eps)
   140  		if err != nil {
   141  			// Cannot resolve service from endpoints. We have nothing to do here.
   142  			continue
   143  		}
   144  
   145  		if !exists {
   146  			// No service associated with this endpoint. We're not interested in this.
   147  			continue
   148  		}
   149  
   150  		svcID := eps.ServiceID
   151  
   152  		for _, be := range eps.Backends {
   153  			if !be.Terminating && be.NodeName == localNodeName {
   154  				// At least one endpoint is available on this node. We
   155  				// can make unavailable to available.
   156  				if _, found := ls[svcID]; !found {
   157  					ls[svcID] = struct{}{}
   158  				}
   159  				continue endpointsLoop
   160  			}
   161  		}
   162  	}
   163  
   164  	return ls, nil
   165  }
   166  
   167  func hasLocalEndpoints(svc *slim_corev1.Service, ls localServices) bool {
   168  	_, found := ls[k8s.ServiceID{Name: svc.GetName(), Namespace: svc.GetNamespace()}]
   169  	return found
   170  }
   171  
   172  // fullReconciliation reconciles all services, this is a heavy operation due to the potential amount of services and
   173  // thus should be avoided if partial reconciliation is an option.
   174  func (r *ServiceReconciler) fullReconciliation(ctx context.Context, sc *instance.ServerWithConfig, newc *v2alpha1api.CiliumBGPVirtualRouter, ls localServices) error {
   175  	toReconcile, toWithdraw, err := r.fullReconciliationServiceList(sc)
   176  	if err != nil {
   177  		return err
   178  	}
   179  	for _, svc := range toReconcile {
   180  		if err := r.reconcileService(ctx, sc, newc, svc, ls); err != nil {
   181  			return fmt.Errorf("failed to reconcile service %s/%s: %w", svc.Namespace, svc.Name, err)
   182  		}
   183  	}
   184  	for _, svc := range toWithdraw {
   185  		if err := r.withdrawService(ctx, sc, svc); err != nil {
   186  			return fmt.Errorf("failed to withdraw service %s/%s: %w", svc.Namespace, svc.Name, err)
   187  		}
   188  	}
   189  	return nil
   190  }
   191  
   192  // svcDiffReconciliation performs reconciliation, only on services which have been created, updated or deleted since
   193  // the last diff reconciliation.
   194  func (r *ServiceReconciler) svcDiffReconciliation(ctx context.Context, sc *instance.ServerWithConfig, newc *v2alpha1api.CiliumBGPVirtualRouter, ls localServices) error {
   195  	toReconcile, toWithdraw, err := r.diffReconciliationServiceList(sc)
   196  	if err != nil {
   197  		return err
   198  	}
   199  	for _, svc := range toReconcile {
   200  		if err := r.reconcileService(ctx, sc, newc, svc, ls); err != nil {
   201  			return fmt.Errorf("failed to reconcile service %s/%s: %w", svc.Namespace, svc.Name, err)
   202  		}
   203  	}
   204  	// Loop over the deleted services
   205  	for _, svcKey := range toWithdraw {
   206  		if err := r.withdrawService(ctx, sc, svcKey); err != nil {
   207  			return fmt.Errorf("failed to withdraw service %s: %w", svcKey, err)
   208  		}
   209  	}
   210  	return nil
   211  }
   212  
   213  // fullReconciliationServiceList return a list of services to reconcile and to withdraw when performing
   214  // full service reconciliation.
   215  func (r *ServiceReconciler) fullReconciliationServiceList(sc *instance.ServerWithConfig) (toReconcile []*slim_corev1.Service, toWithdraw []resource.Key, err error) {
   216  	// Init diff in diffstores, so that it contains only changes since the last full reconciliation.
   217  	// Despite doing it in Init(), we still need this InitDiff to clean up the old diff when the instance is re-created
   218  	// by the preflight reconciler. Once Init() is called upon re-create by preflight, we can remove this.
   219  	r.diffStore.InitDiff(r.diffID(sc.ASN))
   220  	r.epDiffStore.InitDiff(r.diffID(sc.ASN))
   221  
   222  	// Loop over all existing announcements, find announcements for services which no longer exist
   223  	serviceAnnouncements := r.getMetadata(sc)
   224  	for svcKey := range serviceAnnouncements {
   225  		_, found, err := r.diffStore.GetByKey(svcKey)
   226  		if err != nil {
   227  			return nil, nil, fmt.Errorf("diffStore.GetByKey(): %w", err)
   228  		}
   229  		// if the service no longer exists, withdraw it
   230  		if !found {
   231  			toWithdraw = append(toWithdraw, svcKey)
   232  		}
   233  	}
   234  
   235  	// Reconcile all existing services
   236  	svcList, err := r.diffStore.List()
   237  	if err != nil {
   238  		return nil, nil, fmt.Errorf("failed to list services from diffstore: %w", err)
   239  	}
   240  	toReconcile = append(toReconcile, svcList...)
   241  
   242  	return toReconcile, toWithdraw, nil
   243  }
   244  
   245  // diffReconciliationServiceList returns a list of services to reconcile and to withdraw when
   246  // performing partial (diff) service reconciliation.
   247  func (r *ServiceReconciler) diffReconciliationServiceList(sc *instance.ServerWithConfig) (toReconcile []*slim_corev1.Service, toWithdraw []resource.Key, err error) {
   248  	upserted, deleted, err := r.diffStore.Diff(r.diffID(sc.ASN))
   249  	if err != nil {
   250  		return nil, nil, fmt.Errorf("svc store diff: %w", err)
   251  	}
   252  
   253  	// For externalTrafficPolicy=local, we need to take care of
   254  	// the endpoint changes in addition to the service changes.
   255  	// Take a diff of the endpoints and get affected services.
   256  	// We don't handle service deletion here since we only see
   257  	// the key, we cannot resolve associated service, so we have
   258  	// nothing to do.
   259  	epsUpserted, _, err := r.epDiffStore.Diff(r.diffID(sc.ASN))
   260  	if err != nil {
   261  		return nil, nil, fmt.Errorf("endpoints store diff: %w", err)
   262  	}
   263  
   264  	for _, eps := range epsUpserted {
   265  		svc, exists, err := r.resolveSvcFromEndpoints(eps)
   266  		if err != nil {
   267  			// Cannot resolve service from endpoints. We have nothing to do here.
   268  			continue
   269  		}
   270  
   271  		if !exists {
   272  			// No service associated with this endpoint. We're not interested in this.
   273  			continue
   274  		}
   275  
   276  		// We only need Endpoints tracking for externalTrafficPolicy=Local or internalTrafficPolicy=Local.
   277  		if svc.Spec.ExternalTrafficPolicy == slim_corev1.ServiceExternalTrafficPolicyLocal ||
   278  			(svc.Spec.InternalTrafficPolicy != nil && *svc.Spec.InternalTrafficPolicy == slim_corev1.ServiceInternalTrafficPolicyLocal) {
   279  			upserted = append(upserted, svc)
   280  		}
   281  	}
   282  
   283  	// We may have duplicated services that changes happened for both of
   284  	// service and associated endpoints.
   285  	deduped := ciliumslices.UniqueFunc(
   286  		upserted,
   287  		func(i int) resource.Key {
   288  			return resource.Key{
   289  				Name:      upserted[i].GetName(),
   290  				Namespace: upserted[i].GetNamespace(),
   291  			}
   292  		},
   293  	)
   294  
   295  	return deduped, deleted, nil
   296  }
   297  
   298  // svcDesiredRoutes determines which, if any routes should be announced for the given service. This determines the
   299  // desired state.
   300  func (r *ServiceReconciler) svcDesiredRoutes(newc *v2alpha1api.CiliumBGPVirtualRouter, svc *slim_corev1.Service, ls localServices) ([]netip.Prefix, error) {
   301  	if newc.ServiceSelector == nil {
   302  		// If the vRouter has no service selector, there are no desired routes.
   303  		return nil, nil
   304  	}
   305  
   306  	// The vRouter has a service selector, so determine the desired routes.
   307  	svcSelector, err := slim_metav1.LabelSelectorAsSelector(newc.ServiceSelector)
   308  	if err != nil {
   309  		return nil, fmt.Errorf("labelSelectorAsSelector: %w", err)
   310  	}
   311  
   312  	// Ignore non matching services.
   313  	if !svcSelector.Matches(serviceLabelSet(svc)) {
   314  		return nil, nil
   315  	}
   316  
   317  	var desiredRoutes []netip.Prefix
   318  	// Loop over the service advertisements and determine the desired routes.
   319  	for _, svcAdv := range newc.ServiceAdvertisements {
   320  		switch svcAdv {
   321  		case v2alpha1api.BGPLoadBalancerIPAddr:
   322  			desiredRoutes = append(desiredRoutes, r.lbSvcDesiredRoutes(svc, ls)...)
   323  		case v2alpha1api.BGPClusterIPAddr:
   324  			desiredRoutes = append(desiredRoutes, r.clusterIPDesiredRoutes(svc, ls)...)
   325  		case v2alpha1api.BGPExternalIPAddr:
   326  			desiredRoutes = append(desiredRoutes, r.externalIPDesiredRoutes(svc, ls)...)
   327  		}
   328  	}
   329  
   330  	return desiredRoutes, err
   331  }
   332  
   333  func (r *ServiceReconciler) externalIPDesiredRoutes(svc *slim_corev1.Service, ls localServices) []netip.Prefix {
   334  	var desiredRoutes []netip.Prefix
   335  	// Ignore externalTrafficPolicy == Local && no local endpoints.
   336  	if svc.Spec.ExternalTrafficPolicy == slim_corev1.ServiceExternalTrafficPolicyLocal &&
   337  		!hasLocalEndpoints(svc, ls) {
   338  		return desiredRoutes
   339  	}
   340  	for _, extIP := range svc.Spec.ExternalIPs {
   341  		if extIP == "" {
   342  			continue
   343  		}
   344  		addr, err := netip.ParseAddr(extIP)
   345  		if err != nil {
   346  			continue
   347  		}
   348  		desiredRoutes = append(desiredRoutes, netip.PrefixFrom(addr, addr.BitLen()))
   349  	}
   350  	return desiredRoutes
   351  }
   352  
   353  func (r *ServiceReconciler) clusterIPDesiredRoutes(svc *slim_corev1.Service, ls localServices) []netip.Prefix {
   354  	var desiredRoutes []netip.Prefix
   355  	// Ignore internalTrafficPolicy == Local && no local endpoints.
   356  	if svc.Spec.InternalTrafficPolicy != nil && *svc.Spec.InternalTrafficPolicy == slim_corev1.ServiceInternalTrafficPolicyLocal &&
   357  		!hasLocalEndpoints(svc, ls) {
   358  		return desiredRoutes
   359  	}
   360  	if svc.Spec.ClusterIP == "" || len(svc.Spec.ClusterIPs) == 0 || svc.Spec.ClusterIP == corev1.ClusterIPNone {
   361  		return desiredRoutes
   362  	}
   363  	ips := sets.New[string]()
   364  	if svc.Spec.ClusterIP != "" {
   365  		ips.Insert(svc.Spec.ClusterIP)
   366  	}
   367  	for _, clusterIP := range svc.Spec.ClusterIPs {
   368  		if clusterIP == "" || clusterIP == corev1.ClusterIPNone {
   369  			continue
   370  		}
   371  		ips.Insert(clusterIP)
   372  	}
   373  	for _, ip := range sets.List(ips) {
   374  		addr, err := netip.ParseAddr(ip)
   375  		if err != nil {
   376  			continue
   377  		}
   378  		desiredRoutes = append(desiredRoutes, netip.PrefixFrom(addr, addr.BitLen()))
   379  	}
   380  	return desiredRoutes
   381  }
   382  
   383  func (r *ServiceReconciler) lbSvcDesiredRoutes(svc *slim_corev1.Service, ls localServices) []netip.Prefix {
   384  	var desiredRoutes []netip.Prefix
   385  	if svc.Spec.Type != slim_corev1.ServiceTypeLoadBalancer {
   386  		return desiredRoutes
   387  	}
   388  	// Ignore externalTrafficPolicy == Local && no local endpoints.
   389  	if svc.Spec.ExternalTrafficPolicy == slim_corev1.ServiceExternalTrafficPolicyLocal &&
   390  		!hasLocalEndpoints(svc, ls) {
   391  		return desiredRoutes
   392  	}
   393  	// Ignore service managed by an unsupported LB class.
   394  	if svc.Spec.LoadBalancerClass != nil && *svc.Spec.LoadBalancerClass != v2alpha1api.BGPLoadBalancerClass {
   395  		// The service is managed by a different LB class.
   396  		return desiredRoutes
   397  	}
   398  	for _, ingress := range svc.Status.LoadBalancer.Ingress {
   399  		if ingress.IP == "" {
   400  			continue
   401  		}
   402  		addr, err := netip.ParseAddr(ingress.IP)
   403  		if err != nil {
   404  			continue
   405  		}
   406  		desiredRoutes = append(desiredRoutes, netip.PrefixFrom(addr, addr.BitLen()))
   407  	}
   408  	return desiredRoutes
   409  }
   410  
   411  // reconcileService gets the desired routes of a given service and makes sure that is what is being announced.
   412  func (r *ServiceReconciler) reconcileService(ctx context.Context, sc *instance.ServerWithConfig, newc *v2alpha1api.CiliumBGPVirtualRouter, svc *slim_corev1.Service, ls localServices) error {
   413  
   414  	desiredRoutes, err := r.svcDesiredRoutes(newc, svc, ls)
   415  	if err != nil {
   416  		return fmt.Errorf("failed to retrieve svc desired routes: %w", err)
   417  	}
   418  	return r.reconcileServiceRoutes(ctx, sc, svc, desiredRoutes)
   419  }
   420  
   421  // reconcileServiceRoutes ensures that desired routes of a given service are announced,
   422  // adding missing announcements or withdrawing unwanted ones.
   423  func (r *ServiceReconciler) reconcileServiceRoutes(ctx context.Context, sc *instance.ServerWithConfig, svc *slim_corev1.Service, desiredRoutes []netip.Prefix) error {
   424  	serviceAnnouncements := r.getMetadata(sc)
   425  	svcKey := resource.NewKey(svc)
   426  
   427  	for _, desiredCidr := range desiredRoutes {
   428  		// If this route has already been announced, don't add it again
   429  		if slices.IndexFunc(serviceAnnouncements[svcKey], func(existing *types.Path) bool {
   430  			return desiredCidr.String() == existing.NLRI.String()
   431  		}) != -1 {
   432  			continue
   433  		}
   434  
   435  		// Advertise the new cidr
   436  		advertPathResp, err := sc.Server.AdvertisePath(ctx, types.PathRequest{
   437  			Path: types.NewPathForPrefix(desiredCidr),
   438  		})
   439  		if err != nil {
   440  			return fmt.Errorf("failed to advertise service route %v: %w", desiredCidr, err)
   441  		}
   442  		serviceAnnouncements[svcKey] = append(serviceAnnouncements[svcKey], advertPathResp.Path)
   443  	}
   444  
   445  	// Loop over announcements in reverse order so we can delete entries without effecting iteration.
   446  	for i := len(serviceAnnouncements[svcKey]) - 1; i >= 0; i-- {
   447  		announcement := serviceAnnouncements[svcKey][i]
   448  		// If the announcement is within the list of desired routes, don't remove it
   449  		if slices.IndexFunc(desiredRoutes, func(existing netip.Prefix) bool {
   450  			return existing.String() == announcement.NLRI.String()
   451  		}) != -1 {
   452  			continue
   453  		}
   454  
   455  		if err := sc.Server.WithdrawPath(ctx, types.PathRequest{Path: announcement}); err != nil {
   456  			return fmt.Errorf("failed to withdraw service route %s: %w", announcement.NLRI, err)
   457  		}
   458  
   459  		// Delete announcement from slice
   460  		serviceAnnouncements[svcKey] = slices.Delete(serviceAnnouncements[svcKey], i, i+1)
   461  	}
   462  	return nil
   463  }
   464  
   465  // withdrawService removes all announcements for the given service
   466  func (r *ServiceReconciler) withdrawService(ctx context.Context, sc *instance.ServerWithConfig, key resource.Key) error {
   467  	serviceAnnouncements := r.getMetadata(sc)
   468  	advertisements := serviceAnnouncements[key]
   469  	// Loop in reverse order so we can delete without effect to the iteration.
   470  	for i := len(advertisements) - 1; i >= 0; i-- {
   471  		advertisement := advertisements[i]
   472  		if err := sc.Server.WithdrawPath(ctx, types.PathRequest{Path: advertisement}); err != nil {
   473  			// Persist remaining advertisements
   474  			serviceAnnouncements[key] = advertisements
   475  			return fmt.Errorf("failed to withdraw deleted service route: %v: %w", advertisement.NLRI, err)
   476  		}
   477  
   478  		// Delete the advertisement after each withdraw in case we error half way through
   479  		advertisements = slices.Delete(advertisements, i, i+1)
   480  	}
   481  
   482  	// If all were withdrawn without error, we can delete the whole svc from the map
   483  	delete(serviceAnnouncements, key)
   484  
   485  	return nil
   486  }
   487  
   488  func (r *ServiceReconciler) diffID(asn uint32) string {
   489  	return fmt.Sprintf("%s-%d", r.Name(), asn)
   490  }
   491  
   492  func serviceLabelSet(svc *slim_corev1.Service) labels.Labels {
   493  	svcLabels := maps.Clone(svc.Labels)
   494  	if svcLabels == nil {
   495  		svcLabels = make(map[string]string)
   496  	}
   497  	svcLabels["io.kubernetes.service.name"] = svc.Name
   498  	svcLabels["io.kubernetes.service.namespace"] = svc.Namespace
   499  	return labels.Set(svcLabels)
   500  }