
     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     4  package l2announcer
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"net/http"
    12  	"net/netip"
    13  	"regexp"
    14  	"slices"
    15  	"strings"
    17  	""
    18  	""
    19  	""
    20  	""
    21  	""
    22  	apierrors ""
    23  	metav1 ""
    24  	""
    25  	""
    26  	""
    28  	daemon_k8s ""
    29  	""
    30  	""
    31  	v2 ""
    32  	cilium_api_v2alpha1 ""
    33  	k8sClient ""
    34  	""
    35  	slim_corev1 ""
    36  	""
    37  	slim_meta_v1 ""
    38  	""
    39  	""
    40  	""
    41  )
    43  var Cell = cell.Module(
    44  	"l2-announcer",
    45  	"L2 Announcer",
    47  	cell.Provide(NewL2Announcer),
    48  	cell.Provide(l2AnnouncementPolicyResource),
    49  )
    51  func l2AnnouncementPolicyResource(lc cell.Lifecycle, cs k8sClient.Clientset) (resource.Resource[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy], error) {
    52  	if !cs.IsEnabled() {
    53  		return nil, nil
    54  	}
    55  	lw := utils.ListerWatcherFromTyped(
    56  		cs.CiliumV2alpha1().CiliumL2AnnouncementPolicies(),
    57  	)
    58  	return resource.New[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy](lc, lw, resource.WithMetric("CiliumL2AnnouncementPolicy")), nil
    59  }
    61  type l2AnnouncerParams struct {
    62  	cell.In
    64  	Lifecycle cell.Lifecycle
    65  	Logger    logrus.FieldLogger
    66  	Health    cell.Health
    68  	DaemonConfig         *option.DaemonConfig
    69  	Clientset            k8sClient.Clientset
    70  	Services             resource.Resource[*slim_corev1.Service]
    71  	L2AnnouncementPolicy resource.Resource[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy]
    72  	LocalNodeResource    daemon_k8s.LocalCiliumNodeResource
    73  	L2AnnounceTable      statedb.RWTable[*tables.L2AnnounceEntry]
    74  	Devices              statedb.Table[*tables.Device]
    75  	StateDB              *statedb.DB
    76  	JobGroup             job.Group
    77  }
    79  // L2Announcer takes all L2 announcement policies and filters down to those that match the labels of the local node. It
    80  // then searches all services that match the selectors of the policies. For each service, we attempt to take a lease,
    81  // the holder node persists all IPs and netdev combinations selected by the policy to the L2AnnounceTable. Datapath
    82  // components consume them and handle traffic for the IP+netdev entries.
    83  type L2Announcer struct {
    84  	params l2AnnouncerParams
    86  	svcStore    resource.Store[*slim_corev1.Service]
    87  	policyStore resource.Store[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy]
    88  	localNode   *v2.CiliumNode
    90  	scopedGroup job.ScopedGroup
    92  	leaderChannel     chan leaderElectionEvent
    93  	devicesUpdatedSig chan struct{}
    95  	// selectedPolicies matching the current node.
    96  	selectedPolicies map[resource.Key]*selectedPolicy
    97  	// Services which are selected by one or more policies for which we thus want to participate in leader election.
    98  	// Indexed by service key.
    99  	selectedServices map[resource.Key]*selectedService
   100  	// A list of devices which can be matched by the policies
   101  	devices []string
   102  }
   104  func NewL2Announcer(params l2AnnouncerParams) *L2Announcer {
   105  	// These values were picked because it seemed right, change if necessary
   106  	const leaderElectionBufferSize = 16
   107  	announcer := &L2Announcer{
   108  		params:            params,
   109  		selectedServices:  make(map[resource.Key]*selectedService),
   110  		selectedPolicies:  make(map[resource.Key]*selectedPolicy),
   111  		leaderChannel:     make(chan leaderElectionEvent, leaderElectionBufferSize),
   112  		devicesUpdatedSig: make(chan struct{}, 1),
   113  	}
   115  	// Can't operate or GC if client set is disabled
   116  	if !params.Clientset.IsEnabled() {
   117  		return announcer
   118  	}
   120  	announcer.scopedGroup = announcer.params.JobGroup.Scoped("leader-election")
   122  	if !params.DaemonConfig.EnableL2Announcements {
   123  		// If the L2 announcement feature is disabled, garbage collect any leases from previous runs when the feature
   124  		// might have been active. Just once, not on a timer.
   125  		announcer.params.JobGroup.Add(job.OneShot("l2-announcer lease-gc", announcer.leaseGC))
   126  		return announcer
   127  	}
   129  	announcer.params.JobGroup.Add(job.OneShot("l2-announcer run",
   130  	announcer.params.JobGroup.Add(job.Timer("l2-announcer lease-gc", func(ctx context.Context) error {
   131  		return announcer.leaseGC(ctx, nil)
   132  	}, time.Minute))
   134  	return announcer
   135  }
   137  func (l2a *L2Announcer) run(ctx context.Context, health cell.Health) error {
   138  	var err error
   139  	l2a.svcStore, err = l2a.params.Services.Store(ctx)
   140  	if err != nil {
   141  		return fmt.Errorf("get service store: %w", err)
   142  	}
   144  	l2a.policyStore, err = l2a.params.L2AnnouncementPolicy.Store(ctx)
   145  	if err != nil {
   146  		return fmt.Errorf("get policy store: %w", err)
   147  	}
   149  	svcChan := l2a.params.Services.Events(ctx)
   150  	policyChan := l2a.params.L2AnnouncementPolicy.Events(ctx)
   151  	localNodeChan := l2a.params.LocalNodeResource.Events(ctx)
   153  	devices, watchDevices := tables.SelectedDevices(l2a.params.Devices, l2a.params.StateDB.ReadTxn())
   154  	l2a.devices = tables.DeviceNames(devices)
   156  	// We have to first have a local node before we can start processing other events.
   157  	for {
   158  		event, more := <-localNodeChan
   159  		// resource closed, shutting down
   160  		if !more {
   161  			return nil
   162  		}
   164  		if err := l2a.processLocalNodeEvent(ctx, event); err != nil {
   165  			l2a.params.Logger.WithError(err).Warn("Error processing local node event")
   166  		}
   168  		if l2a.localNode != nil {
   169  			break
   170  		}
   171  	}
   173  loop:
   174  	for {
   175  		select {
   176  		case <-ctx.Done():
   177  			break loop
   178  		case event, more := <-svcChan:
   179  			// resource closed, shutting down
   180  			if !more {
   181  				break loop
   182  			}
   184  			if err := l2a.processSvcEvent(event); err != nil {
   185  				l2a.params.Logger.WithError(err).Warn("Error processing service event")
   186  			}
   188  		case event, more := <-policyChan:
   189  			// resource closed, shutting down
   190  			if !more {
   191  				break loop
   192  			}
   194  			if err := l2a.processPolicyEvent(ctx, event); err != nil {
   195  				l2a.params.Logger.WithError(err).Warn("Error processing policy event")
   196  			}
   198  		case event, more := <-localNodeChan:
   199  			// resource closed, shutting down
   200  			if !more {
   201  				break loop
   202  			}
   204  			if err := l2a.processLocalNodeEvent(ctx, event); err != nil {
   205  				l2a.params.Logger.WithError(err).Warn("Error processing local node event")
   206  			}
   208  		case event := <-l2a.leaderChannel:
   209  			if err := l2a.processLeaderEvent(event); err != nil {
   210  				l2a.params.Logger.WithError(err).Warn("Error processing leader event")
   211  			}
   213  		case <-watchDevices:
   214  			devices, watchDevices = tables.SelectedDevices(l2a.params.Devices, l2a.params.StateDB.ReadTxn())
   215  			deviceNames := tables.DeviceNames(devices)
   217  			if slices.Equal(l2a.devices, deviceNames) {
   218  				continue
   219  			}
   220  			l2a.devices = deviceNames
   221  			if err := l2a.processDevicesChanged(ctx); err != nil {
   222  				l2a.params.Logger.WithError(err).Warn("Error processing devices changed signal")
   223  			}
   224  		}
   225  	}
   227  	return nil
   228  }
   230  // Called periodically to garbage collect any leases which are no longer held by any agent.
   231  // This is needed since agents do not track leases for services that we no longer select.
   232  func (l2a *L2Announcer) leaseGC(ctx context.Context, health cell.Health) error {
   233  	leaseClient := l2a.params.Clientset.CoordinationV1().Leases(l2a.leaseNamespace())
   234  	list, err := leaseClient.List(ctx, metav1.ListOptions{})
   235  	if err != nil {
   236  		var statusErr *apierrors.StatusError
   237  		if errors.As(err, &statusErr) && statusErr.Status().Code == http.StatusForbidden {
   238  			// LeaseGC can't check if L2 announcements were enabled before this run.
   239  			// So we assume. If the feature was never enabled, we get this forbidden error since
   240  			// the cluster role for the cilium agent will not have permission, this is expected.
   241  			return nil
   242  		}
   244  		return fmt.Errorf("leaseClient.List: %w", err)
   245  	}
   247  	for _, lease := range list.Items {
   248  		if !strings.HasPrefix(lease.Name, leasePrefix) {
   249  			continue
   250  		}
   252  		if lease.Spec.HolderIdentity != nil && *lease.Spec.HolderIdentity != "" {
   253  			continue
   254  		}
   256  		err = leaseClient.Delete(ctx, lease.Name, metav1.DeleteOptions{})
   257  		if err != nil {
   258  			return fmt.Errorf("leaseClient.Delete(%s): %w", lease.Name, err)
   259  		}
   260  	}
   262  	return nil
   263  }
   265  func (l2a *L2Announcer) processDevicesChanged(ctx context.Context) error {
   266  	var errs error
   268  	// Upsert every known policy which will re-evaluate device matching
   269  	for _, selectedPolicy := range l2a.selectedPolicies {
   270  		if err := l2a.upsertPolicy(ctx, selectedPolicy.policy); err != nil {
   271  			errs = errors.Join(errs, fmt.Errorf("upsert policy: %w", err))
   272  		}
   273  	}
   275  	return errs
   276  }
   278  func (l2a *L2Announcer) processPolicyEvent(ctx context.Context, event resource.Event[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy]) error {
   279  	var err error
   280  	switch event.Kind {
   281  	case resource.Upsert:
   282  		err = l2a.upsertPolicy(ctx, event.Object)
   283  		if err != nil {
   284  			err = fmt.Errorf("upsert policy: %w", err)
   285  		}
   287  	case resource.Delete:
   288  		err = l2a.delPolicy(event.Key)
   289  		if err != nil {
   290  			err = fmt.Errorf("delete policy: %w", err)
   291  		}
   293  	case resource.Sync:
   294  	}
   296  	// if `err` is not nil, the event will be retried by the resource.
   297  	event.Done(err)
   298  	return err
   299  }
   301  func (l2a *L2Announcer) upsertSvc(svc *slim_corev1.Service) error {
   302  	key := serviceKey(svc)
   304  	// Ignore services if there is no noExternal or LB IP assigned.
   305  	noExternal := svc.Spec.ExternalIPs == nil
   306  	noLB := true
   307  	for _, v := range svc.Status.LoadBalancer.Ingress {
   308  		if v.IP != "" {
   309  			noLB = false
   310  			break
   311  		}
   312  	}
   313  	if noExternal && noLB {
   314  		return l2a.delSvc(key)
   315  	}
   317  	// Ignore services managed by an unsupported load balancer class.
   318  	if svc.Spec.LoadBalancerClass != nil &&
   319  		*svc.Spec.LoadBalancerClass != cilium_api_v2alpha1.L2AnnounceLoadBalancerClass {
   320  		return l2a.delSvc(key)
   321  	}
   323  	ss, found := l2a.selectedServices[key]
   324  	if found {
   325  		// Update service object, labels or IPs may have changed
   326  		ss.svc = svc
   328  		// Since labels may have changed, remove all matching policies, re-match against all known policies.
   329  		ss.byPolicies = nil
   330  		for policyKey, selectedPolicy := range l2a.selectedPolicies {
   331  			if selectedPolicy.serviceSelector.Matches(svcAndMetaLabels(svc)) {
   332  				// Policy IP type and Service IP type must match
   333  				if (selectedPolicy.policy.Spec.ExternalIPs && !noExternal) ||
   334  					(selectedPolicy.policy.Spec.LoadBalancerIPs && !noLB) {
   335  					ss.byPolicies = append(ss.byPolicies, policyKey)
   336  				}
   337  			}
   338  		}
   340  		// If no policies match anymore, delete the service
   341  		if len(ss.byPolicies) == 0 {
   342  			// gcOrphanedService deletes when a service has no policies, which is the case here.
   343  			// It also stops any lease subscription and reconciles the output table.
   344  			l2a.gcOrphanedService(ss)
   345  			return nil
   346  		}
   348  		// Since IPs may have changed, re-calculate its entries in the output table, if we are leader
   349  		err := l2a.recalculateL2EntriesTableEntries(ss)
   350  		if err != nil {
   351  			return fmt.Errorf("recalculateL2EntriesTableEntries: %w", err)
   352  		}
   354  		return nil
   355  	}
   357  	// Service is not selected, check if any policies match.
   358  	var matchingPolicies []resource.Key
   359  	for policyKey, selectedPolicy := range l2a.selectedPolicies {
   360  		if selectedPolicy.serviceSelector.Matches(svcAndMetaLabels(svc)) {
   361  			// Policy IP type and Service IP type must match
   362  			if (selectedPolicy.policy.Spec.ExternalIPs && !noExternal) ||
   363  				(selectedPolicy.policy.Spec.LoadBalancerIPs && !noLB) {
   364  				matchingPolicies = append(matchingPolicies, policyKey)
   365  			}
   366  		}
   367  	}
   369  	// Add the services to list of selected services if at least 1 policy matches it.
   370  	if len(matchingPolicies) >= 1 {
   371  		l2a.addSelectedService(svc, matchingPolicies)
   372  	}
   374  	return nil
   375  }
   377  func (l2a *L2Announcer) delSvc(key resource.Key) error {
   378  	ss, found := l2a.selectedServices[key]
   379  	if !found {
   380  		return nil
   381  	}
   383  	// gcOrphanedService will delete the service if it has no policies that match, so remove the policy references
   384  	// and call gcOrphanedService. It will remove the service, stop leader election for it and reconcile the output
   385  	// table if we were leader for the service.
   386  	ss.byPolicies = nil
   387  	err := l2a.gcOrphanedService(ss)
   388  	if err != nil {
   389  		return fmt.Errorf("gcOrphanedService: %w", err)
   390  	}
   392  	return nil
   393  }
   395  func (l2a *L2Announcer) processSvcEvent(event resource.Event[*slim_corev1.Service]) error {
   396  	var err error
   397  	switch event.Kind {
   398  	case resource.Upsert:
   399  		err = l2a.upsertSvc(event.Object)
   400  		if err != nil {
   401  			err = fmt.Errorf("upsert service: %w", err)
   402  		}
   404  	case resource.Delete:
   405  		err = l2a.delSvc(event.Key)
   406  		if err != nil {
   407  			err = fmt.Errorf("delete service: %w", err)
   408  		}
   410  	case resource.Sync:
   411  	}
   413  	// if `err` is not nil, this will cause the resource to retry the event.
   414  	event.Done(err)
   415  	return err
   416  }
   418  func policyKey(policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy) resource.Key {
   419  	return resource.Key{Name: policy.Name}
   420  }
   422  func serviceKey(svc *slim_corev1.Service) resource.Key {
   423  	return resource.Key{Namespace: svc.Namespace, Name: svc.Name}
   424  }
   426  func (l2a *L2Announcer) upsertPolicy(ctx context.Context, policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy) error {
   427  	key := policyKey(policy)
   429  	// Remove all references to the old policy, since the new version might not match the service anymore.
   430  	for _, ss := range l2a.selectedServices {
   431  		idx := slices.Index(ss.byPolicies, key)
   432  		if idx != -1 {
   433  			ss.byPolicies = slices.Delete(ss.byPolicies, idx, idx+1)
   434  		}
   435  	}
   437  	if policy.Spec.NodeSelector != nil {
   438  		nodeselector, err := slim_meta_v1.LabelSelectorAsSelector(policy.Spec.NodeSelector)
   439  		if err != nil {
   440  			if err2 := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-node-selector", err); err2 != nil {
   441  				l2a.params.Logger.WithError(err2).Warn("updating policy status failed")
   442  			}
   443  			return fmt.Errorf("make node selector: %w", err)
   444  		}
   445  		if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-node-selector", nil); err != nil {
   446  			l2a.params.Logger.WithError(err).Warn("updating policy status failed")
   447  		}
   449  		// The new policy does not match the node selector
   450  		if !nodeselector.Matches(labels.Set(l2a.localNode.Labels)) {
   451  			err = l2a.delPolicy(key)
   452  			if err != nil {
   453  				return fmt.Errorf("del policy: %w", err)
   454  			}
   455  			return nil
   456  		}
   457  	} else {
   458  		// Clear any error status if it was set before
   459  		if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-node-selector", nil); err != nil {
   460  			l2a.params.Logger.WithError(err).Warn("updating policy status failed")
   461  		}
   462  	}
   464  	// If no interface regexes are given, all devices match. Otherwise only devices matching the policy
   465  	// will be selected.
   466  	var selectedDevices []string
   467  	if len(policy.Spec.Interfaces) == 0 {
   468  		selectedDevices = l2a.devices
   469  	} else {
   470  		for _, strRegex := range policy.Spec.Interfaces {
   471  			regex, err := regexp.Compile(strRegex)
   472  			if err != nil {
   473  				if err2 := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-interface-regex", err); err2 != nil {
   474  					l2a.params.Logger.WithError(err2).Warn("updating policy status failed")
   475  				}
   476  				return fmt.Errorf("policy compile interface regex: %w", err)
   477  			}
   479  			for _, device := range l2a.devices {
   480  				if slices.Contains(selectedDevices, device) {
   481  					continue
   482  				}
   484  				if regex.MatchString(device) {
   485  					selectedDevices = append(selectedDevices, device)
   486  				}
   487  			}
   488  		}
   489  	}
   491  	// Clear any error status if it was set before.
   492  	if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-interface-regex", nil); err != nil {
   493  		l2a.params.Logger.WithError(err).Warn("updating policy status failed")
   494  	}
   496  	// If no selector is specified, all services match.
   497  	serviceSelector := labels.Everything()
   498  	if policy.Spec.ServiceSelector != nil {
   499  		var err error
   500  		serviceSelector, err = slim_meta_v1.LabelSelectorAsSelector(policy.Spec.ServiceSelector)
   501  		if err != nil {
   502  			if err2 := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-service-selector", err); err2 != nil {
   503  				l2a.params.Logger.WithError(err2).Warn("updating policy status failed")
   504  			}
   505  			return fmt.Errorf("make service selector: %w", err)
   506  		}
   507  	}
   509  	// Clear any error status if it exists
   510  	if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-service-selector", nil); err != nil {
   511  		l2a.params.Logger.WithError(err).Warn("updating policy status failed")
   512  	}
   514  	l2a.selectedPolicies[key] = &selectedPolicy{
   515  		policy:          policy,
   516  		serviceSelector: serviceSelector,
   517  		selectedDevices: selectedDevices,
   518  	}
   520  	// Check all services, if they match the policy, mark the selected service as matching this policy.
   521  	// Or add to the selected services if it was not there already.
   522  	for _, svc := range l2a.svcStore.List() {
   523  		if !serviceSelector.Matches(svcAndMetaLabels(svc)) {
   524  			continue
   525  		}
   527  		// Ignore services if there is no external or LB IP assigned.
   528  		noExternal := svc.Spec.ExternalIPs == nil
   529  		noLB := true
   530  		for _, v := range svc.Status.LoadBalancer.Ingress {
   531  			if v.IP != "" {
   532  				noLB = false
   533  				break
   534  			}
   535  		}
   536  		if noExternal && noLB {
   537  			continue
   538  		}
   540  		if !((policy.Spec.ExternalIPs && !noExternal) ||
   541  			(policy.Spec.LoadBalancerIPs && !noLB)) {
   542  			continue
   543  		}
   545  		ss, found := l2a.selectedServices[serviceKey(svc)]
   546  		if found {
   547  			if slices.Index(ss.byPolicies, key) == -1 {
   548  				ss.byPolicies = append(ss.byPolicies, key)
   549  			}
   551  			// recalculate in case the policy update causes neighbor proxy entries to be generated differently
   552  			if err := l2a.recalculateL2EntriesTableEntries(ss); err != nil {
   553  				return fmt.Errorf("recalculateNeighborProxyTableEntries: %w", err)
   554  			}
   556  			continue
   557  		}
   559  		l2a.addSelectedService(svc, []resource.Key{key})
   560  	}
   562  	err := l2a.gcOrphanedServices()
   563  	if err != nil {
   564  		return fmt.Errorf("gcOrphanedServices: %w", err)
   565  	}
   567  	return nil
   568  }
   570  const (
   571  	// The string used in the FieldManager field on update options
   572  	ciliumFieldManager = "cilium-agent-l2-announcer"
   573  )
   575  // updatePolicyStatus updates the policy status annotation of the given type, it is called every time an aspect of the
   576  // policy has been checked. If `err` is nil, and no conditions exist, no action is taken. If `err` contains an actual
   577  // error a condition is added or updated and if a condition exists and `err` == nil follows the condition is marked
   578  // false
   579  func (l2a *L2Announcer) updatePolicyStatus(
   580  	ctx context.Context,
   581  	policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy,
   582  	typ string,
   583  	err error,
   584  ) error {
   585  	// Find an existing condition of the given type
   586  	idx := slices.IndexFunc(policy.Status.Conditions, func(c metav1.Condition) bool {
   587  		return c.Type == typ
   588  	})
   590  	var cond *metav1.Condition
   591  	// If no condition of this type exists
   592  	if idx < 0 {
   593  		// If the update call was to clear an error, no action has to happen
   594  		if err == nil {
   595  			return nil
   596  		}
   598  		policy.Status.Conditions = append(policy.Status.Conditions, metav1.Condition{})
   599  		idx = len(policy.Status.Conditions) - 1
   600  	}
   601  	cond = &policy.Status.Conditions[idx]
   603  	cond.Type = typ
   604  	cond.Status = metav1.ConditionTrue
   605  	if err == nil {
   606  		cond.Status = metav1.ConditionFalse
   607  	}
   608  	cond.LastTransitionTime = metav1.Now()
   609  	cond.ObservedGeneration = policy.GetGeneration()
   610  	if err == nil {
   611  		cond.Message = ""
   612  	} else {
   613  		cond.Message = err.Error()
   614  	}
   615  	cond.Reason = "error"
   617  	policyClient := l2a.params.Clientset.CiliumV2alpha1().CiliumL2AnnouncementPolicies()
   619  	replacePolicyStatus := []k8s.JSONPatch{
   620  		{
   621  			OP:    "replace",
   622  			Path:  "/status",
   623  			Value: policy.Status,
   624  		},
   625  	}
   627  	createStatusPatch, err := json.Marshal(replacePolicyStatus)
   628  	if err != nil {
   629  		return fmt.Errorf("json.Marshal(%v) failed: %w", replacePolicyStatus, err)
   630  	}
   632  	_, err = policyClient.Patch(ctx, policy.Name,
   633  		types.JSONPatchType, createStatusPatch, metav1.PatchOptions{
   634  			FieldManager: ciliumFieldManager,
   635  		}, "status")
   637  	return err
   638  }
   640  func (l2a *L2Announcer) delPolicy(key resource.Key) error {
   641  	for _, ss := range l2a.selectedServices {
   642  		idx := slices.Index(ss.byPolicies, key)
   643  		if idx != -1 {
   644  			ss.byPolicies = slices.Delete(ss.byPolicies, idx, idx+1)
   645  		}
   646  	}
   648  	delete(l2a.selectedPolicies, key)
   650  	err := l2a.gcOrphanedServices()
   651  	if err != nil {
   652  		return fmt.Errorf("gcOrphanedServices: %w", err)
   653  	}
   655  	return nil
   656  }
   658  // The leaderelection library enforces sane timer values, this function verifiers that the user input follows the rules
   659  // and overwrites to sane defaults if they don't.
   660  func (l2a *L2Announcer) leaseTimings() (leaseDuration, renewDeadline, retryPeriod time.Duration) {
   661  	leaseDuration = l2a.params.DaemonConfig.L2AnnouncerLeaseDuration
   662  	renewDeadline = l2a.params.DaemonConfig.L2AnnouncerRenewDeadline
   663  	retryPeriod = l2a.params.DaemonConfig.L2AnnouncerRetryPeriod
   665  	log := l2a.params.Logger
   667  	if leaseDuration < 1*time.Second {
   668  		log.WithFields(logrus.Fields{
   669  			"leaseDuration": leaseDuration,
   670  		}).Warnf(
   671  			"--%s must be greater than 1s, defaulting to 1s",
   672  			option.L2AnnouncerLeaseDuration,
   673  		)
   674  		leaseDuration = time.Second
   675  	}
   677  	if renewDeadline < 1 {
   678  		log.WithFields(logrus.Fields{
   679  			"renewDeadline": renewDeadline,
   680  		}).Warnf(
   681  			"--%s must be greater than 1ns, defaulting to 1s",
   682  			option.L2AnnouncerRenewDeadline,
   683  		)
   684  		renewDeadline = time.Second
   685  	}
   687  	if retryPeriod < 1 {
   688  		log.WithFields(logrus.Fields{
   689  			"retryPeriod": retryPeriod,
   690  		}).Warnf(
   691  			"--%s must be greater than 1ns, defaulting to 200ms",
   692  			option.L2AnnouncerRetryPeriod,
   693  		)
   694  		retryPeriod = 200 * time.Millisecond
   695  	}
   697  	if leaseDuration <= renewDeadline {
   698  		log.WithFields(logrus.Fields{
   699  			"leaseDuration": leaseDuration,
   700  			"renewDeadline": renewDeadline,
   701  		}).Warnf(
   702  			"--%s must be greater than --%s, defaulting to a 2/1 ratio",
   703  			option.L2AnnouncerLeaseDuration,
   704  			option.L2AnnouncerRenewDeadline,
   705  		)
   706  		renewDeadline = leaseDuration / 2
   707  	}
   709  	if renewDeadline <= time.Duration(leaderelection.JitterFactor*float64(retryPeriod)) {
   710  		log.WithFields(logrus.Fields{
   711  			"renewDeadline": renewDeadline,
   712  			"retryPeriod":   retryPeriod,
   713  		}).Warnf(
   714  			"--%s must be greater than --%s * %.2f, defaulting to --%s / 2",
   715  			option.L2AnnouncerRenewDeadline,
   716  			option.L2AnnouncerRetryPeriod,
   717  			leaderelection.JitterFactor,
   718  			option.L2AnnouncerRetryPeriod,
   719  		)
   720  		retryPeriod = renewDeadline / 2
   721  	}
   723  	return leaseDuration, renewDeadline, retryPeriod
   724  }
   726  func (l2a *L2Announcer) addSelectedService(svc *slim_corev1.Service, byPolicies []resource.Key) {
   727  	leaseDuration, renewDeadline, retryPeriod := l2a.leaseTimings()
   728  	ss := &selectedService{
   729  		svc:           svc,
   730  		byPolicies:    byPolicies,
   731  		lock:          l2a.newLeaseLock(svc),
   732  		done:          make(chan struct{}),
   733  		leaderChannel: l2a.leaderChannel,
   734  		leaseDuration: leaseDuration,
   735  		renewDeadline: renewDeadline,
   736  		retryPeriod:   retryPeriod,
   737  	}
   739  	l2a.selectedServices[serviceKey(svc)] = ss
   741  	// kick off leader election job
   742  	l2a.scopedGroup.Add(job.OneShot(
   743  		fmt.Sprintf("leader-election/%s/%s", svc.Namespace, svc.Name),
   744  		ss.serviceLeaderElection),
   745  	)
   746  }
   748  func (l2a *L2Announcer) leaseNamespace() string {
   749  	ns := l2a.params.DaemonConfig.K8sNamespace
   750  	// If due to any reason the CILIUM_K8S_NAMESPACE is not set we assume the operator
   751  	// to be in default namespace.
   752  	if ns == "" {
   753  		ns = metav1.NamespaceDefault
   754  	}
   756  	return ns
   757  }
   759  const leasePrefix = "cilium-l2announce"
   761  func (l2a *L2Announcer) newLeaseLock(svc *slim_corev1.Service) *resourcelock.LeaseLock {
   762  	return &resourcelock.LeaseLock{
   763  		LeaseMeta: metav1.ObjectMeta{
   764  			Namespace: l2a.leaseNamespace(),
   765  			Name:      fmt.Sprintf("%s-%s-%s", leasePrefix, svc.Namespace, svc.Name),
   766  		},
   767  		Client: l2a.params.Clientset.CoordinationV1(),
   768  		LockConfig: resourcelock.ResourceLockConfig{
   769  			Identity: l2a.localNode.Name,
   770  		},
   771  	}
   772  }
   774  // Check all selected services, delete services which are no longer selected by any of the policies.
   775  func (l2a *L2Announcer) gcOrphanedServices() error {
   776  	for _, ss := range l2a.selectedServices {
   777  		err := l2a.gcOrphanedService(ss)
   778  		if err != nil {
   779  			return fmt.Errorf("gcOrphanedService: %w", err)
   780  		}
   781  	}
   783  	return nil
   784  }
   786  func (l2a *L2Announcer) gcOrphanedService(ss *selectedService) error {
   787  	// Only GC policies that have been orphaned (all policies that created it has gone away)
   788  	if len(ss.byPolicies) > 0 {
   789  		return nil
   790  	}
   792  	// Stop leader election routine
   793  	ss.stop()
   795  	// Recalculation will remove all entries since we stopped the leader election.
   796  	if err := l2a.recalculateL2EntriesTableEntries(ss); err != nil {
   797  		return fmt.Errorf("recalculateNeighborProxyTableEntries: %w", err)
   798  	}
   800  	// Remove service from selected services
   801  	delete(l2a.selectedServices, serviceKey(ss.svc))
   802  	return nil
   803  }
   805  func (l2a *L2Announcer) processLocalNodeEvent(ctx context.Context, event resource.Event[*v2.CiliumNode]) error {
   806  	var err error
   807  	if event.Kind == resource.Upsert {
   808  		err = l2a.upsertLocalNode(ctx, event.Object)
   809  		if err != nil {
   810  			err = fmt.Errorf("upsert local node: %w", err)
   811  		}
   812  	}
   814  	event.Done(err)
   815  	return err
   816  }
   818  func (l2a *L2Announcer) upsertLocalNode(ctx context.Context, localNode *v2.CiliumNode) error {
   819  	// If the label set did not change, nothing to do.
   820  	if l2a.localNode != nil && labels.Equals(l2a.localNode.Labels, labels.Set(localNode.Labels)) {
   821  		return nil
   822  	}
   824  	l2a.localNode = localNode
   826  	// Delete any policies that no longer match the new label set
   827  	var errs error
   828  	for key, selectedPolicy := range l2a.selectedPolicies {
   829  		var nodeselector labels.Selector
   830  		if selectedPolicy.policy.Spec.NodeSelector == nil {
   831  			nodeselector = labels.Everything()
   832  		} else {
   833  			var err error
   834  			nodeselector, err = slim_meta_v1.LabelSelectorAsSelector(selectedPolicy.policy.Spec.NodeSelector)
   835  			if err != nil {
   836  				if err2 := l2a.updatePolicyStatus(ctx, selectedPolicy.policy, "io.cilium/bad-node-selector", err); err2 != nil {
   837  					l2a.params.Logger.WithError(err2).Warn("updating policy status failed")
   838  				}
   839  				return fmt.Errorf("make node selector: %w", err)
   840  			}
   841  		}
   842  		if err := l2a.updatePolicyStatus(ctx, selectedPolicy.policy, "io.cilium/bad-node-selector", nil); err != nil {
   843  			l2a.params.Logger.WithError(err).Warn("updating policy status failed")
   844  		}
   846  		if nodeselector.Matches(labels.Set(l2a.localNode.Labels)) {
   847  			continue
   848  		}
   850  		err := l2a.delPolicy(key)
   851  		if err != nil {
   852  			errors.Join(errs, fmt.Errorf("delete policy: %w", err))
   853  			continue
   854  		}
   855  	}
   857  	// Upsert all policies, the upsert function checks if they match the new label set
   858  	for _, policy := range l2a.policyStore.List() {
   859  		err := l2a.upsertPolicy(ctx, policy)
   860  		if err != nil {
   861  			errors.Join(errs, fmt.Errorf("upsert policy: %w", err))
   862  			continue
   863  		}
   864  	}
   866  	return errs
   867  }
   869  func (l2a *L2Announcer) processLeaderEvent(event leaderElectionEvent) error {
   870  	event.selectedService.currentlyLeader = event.typ == leaderElectionLeading
   871  	err := l2a.recalculateL2EntriesTableEntries(event.selectedService)
   872  	if err != nil {
   873  		return fmt.Errorf("recalculateNeighborProxyTableEntries: %w", err)
   874  	}
   876  	return nil
   877  }
   879  func (l2a *L2Announcer) recalculateL2EntriesTableEntries(ss *selectedService) error {
   880  	tbl := l2a.params.L2AnnounceTable
   881  	txn := l2a.params.StateDB.WriteTxn(tbl)
   882  	defer txn.Abort()
   884  	svcKey := serviceKey(ss.svc)
   886  	entriesIter := tbl.List(txn, tables.L2AnnounceOriginIndex.Query(svcKey))
   888  	// If we are not the leader, we should not have any proxy entries for the service.
   889  	if !ss.currentlyLeader {
   890  		// Remove origin from entries, and delete if no origins left
   891  		err := statedb.ProcessEach(entriesIter, func(e *tables.L2AnnounceEntry, _ uint64) error {
   892  			// Copy, since modifying objects directly is not allowed.
   893  			e = e.DeepCopy()
   895  			idx := slices.Index(e.Origins, svcKey)
   896  			if idx != -1 {
   897  				e.Origins = slices.Delete(e.Origins, idx, idx+1)
   898  			}
   900  			if len(e.Origins) == 0 {
   901  				_, _, err := tbl.Delete(txn, e)
   902  				if err != nil {
   903  					return fmt.Errorf("delete in table: %w", err)
   904  				}
   905  				return nil
   906  			}
   908  			_, _, err := tbl.Insert(txn, e)
   909  			if err != nil {
   910  				return fmt.Errorf("update in table: %w", err)
   911  			}
   912  			return nil
   913  		})
   914  		if err != nil {
   915  			return fmt.Errorf("failed to modify desired state: %w", err)
   916  		}
   918  		txn.Commit()
   920  		return nil
   921  	}
   923  	desiredEntries := l2a.desiredEntries(ss)
   924  	satisfiedEntries := make(map[string]bool)
   925  	for key := range desiredEntries {
   926  		satisfiedEntries[key] = false
   927  	}
   929  	// Loop over existing entries, delete undesired entries
   930  	err := statedb.ProcessEach(entriesIter, func(e *tables.L2AnnounceEntry, _ uint64) error {
   931  		key := fmt.Sprintf("%s/%s", e.IP, e.NetworkInterface)
   933  		_, desired := desiredEntries[key]
   934  		if desired {
   935  			// Iterator only contains entries which already have the origin of the current svc.
   936  			// So no need to add it in the second step.
   937  			satisfiedEntries[key] = true
   938  			return nil
   939  		}
   941  		// Entry is undesired.
   943  		// Copy, since modifying objects directly is not allowed.
   944  		e = e.DeepCopy()
   946  		idx := slices.Index(e.Origins, svcKey)
   947  		if idx != -1 {
   948  			e.Origins = slices.Delete(e.Origins, idx, idx+1)
   949  		}
   951  		if len(e.Origins) == 0 {
   952  			// Delete, if no services want this IP + NetDev anymore
   953  			tbl.Delete(txn, e)
   954  			return nil
   955  		}
   957  		_, _, err := tbl.Insert(txn, e)
   958  		if err != nil {
   959  			return fmt.Errorf("update in table: %w", err)
   960  		}
   961  		return nil
   962  	})
   963  	if err != nil {
   964  		return fmt.Errorf("failed to modify desired state: %w", err)
   965  	}
   967  	// loop over the desired states, add any that are missing
   968  	for key, satisfied := range satisfiedEntries {
   969  		if satisfied {
   970  			continue
   971  		}
   973  		entry := desiredEntries[key]
   974  		existing, _, _ := tbl.Get(txn, tables.L2AnnounceIDIndex.Query(tables.L2AnnounceKey{
   975  			IP:               entry.IP,
   976  			NetworkInterface: entry.NetworkInterface,
   977  		}))
   978  		if err != nil {
   979  			return fmt.Errorf("first: %w", err)
   980  		}
   982  		if existing == nil {
   983  			existing = &tables.L2AnnounceEntry{
   984  				L2AnnounceKey: tables.L2AnnounceKey{
   985  					IP:               entry.IP,
   986  					NetworkInterface: entry.NetworkInterface,
   987  				},
   988  			}
   989  		}
   991  		// Add our new origin to the existing origins, or if existing is nil (no entry existed), nothing will change.
   992  		entry.Origins = append(existing.Origins, entry.Origins...)
   994  		// Insert or update
   995  		_, _, err = tbl.Insert(txn, entry)
   996  		if err != nil {
   997  			return fmt.Errorf("insert new: %w", err)
   998  		}
   999  		continue
  1000  	}
  1002  	txn.Commit()
  1004  	return nil
  1005  }
  1007  func (l2a *L2Announcer) desiredEntries(ss *selectedService) map[string]*tables.L2AnnounceEntry {
  1008  	entries := make(map[string]*tables.L2AnnounceEntry)
  1010  	for _, policyKey := range ss.byPolicies {
  1011  		selectedPolicy := l2a.selectedPolicies[policyKey]
  1013  		var IPs []netip.Addr
  1014  		if selectedPolicy.policy.Spec.LoadBalancerIPs {
  1015  			for _, ingress := range ss.svc.Status.LoadBalancer.Ingress {
  1016  				if ingress.IP == "" {
  1017  					continue
  1018  				}
  1020  				if addr, err := netip.ParseAddr(ingress.IP); err == nil {
  1021  					IPs = append(IPs, addr)
  1022  				}
  1023  			}
  1024  		}
  1026  		if selectedPolicy.policy.Spec.ExternalIPs {
  1027  			for _, externalIP := range ss.svc.Spec.ExternalIPs {
  1028  				if addr, err := netip.ParseAddr(externalIP); err == nil {
  1029  					IPs = append(IPs, addr)
  1030  				}
  1031  			}
  1032  		}
  1034  		for _, ip := range IPs {
  1035  			for _, iface := range selectedPolicy.selectedDevices {
  1036  				key := fmt.Sprintf("%s/%s", ip.String(), iface)
  1037  				entry, found := entries[key]
  1038  				if !found {
  1039  					entry = &tables.L2AnnounceEntry{
  1040  						L2AnnounceKey: tables.L2AnnounceKey{
  1041  							IP:               ip,
  1042  							NetworkInterface: iface,
  1043  						},
  1044  						Origins: []resource.Key{serviceKey(ss.svc)},
  1045  					}
  1046  				}
  1047  				entries[key] = entry
  1048  			}
  1049  		}
  1050  	}
  1052  	return entries
  1053  }
  1055  const (
  1056  	serviceNamespaceLabel = "io.kubernetes.service.namespace"
  1057  	serviceNameLabel      = ""
  1058  )
  1060  func svcAndMetaLabels(svc *slim_corev1.Service) labels.Set {
  1061  	labels := maps.Clone(svc.GetLabels())
  1062  	if labels == nil {
  1063  		labels = make(map[string]string)
  1064  	}
  1066  	labels[serviceNamespaceLabel] = svc.Namespace
  1067  	labels[serviceNameLabel] = svc.Name
  1068  	return labels
  1069  }
  1071  type selectedService struct {
  1072  	// The last known version of the service
  1073  	svc *slim_corev1.Service
  1074  	// The policies which select this service.
  1075  	byPolicies []resource.Key
  1077  	// lease parameters
  1078  	leaseDuration time.Duration
  1079  	renewDeadline time.Duration
  1080  	retryPeriod   time.Duration
  1082  	// The lock object used to perform leader election for this selected service
  1083  	lock            *resourcelock.LeaseLock
  1084  	currentlyLeader bool
  1085  	leaderChannel   chan leaderElectionEvent
  1087  	// Leader election goroutine lifetime management
  1088  	ctx    context.Context
  1089  	cancel context.CancelFunc
  1090  	done   chan struct{}
  1091  }
  1093  func (ss *selectedService) serviceLeaderElection(ctx context.Context, health cell.Health) error {
  1094  	defer close(ss.done)
  1096  	ss.ctx, ss.cancel = context.WithCancel(ctx)
  1098  	for {
  1099  		select {
  1100  		case <-ss.ctx.Done():
  1101  			return nil
  1102  		default:
  1103  			leaderelection.RunOrDie(ss.ctx, leaderelection.LeaderElectionConfig{
  1104  				Name:            ss.lock.LeaseMeta.Name,
  1105  				Lock:            ss.lock,
  1106  				ReleaseOnCancel: true,
  1108  				LeaseDuration: ss.leaseDuration,
  1109  				RenewDeadline: ss.renewDeadline,
  1110  				RetryPeriod:   ss.retryPeriod,
  1112  				Callbacks: leaderelection.LeaderCallbacks{
  1113  					OnStartedLeading: func(ctx context.Context) {
  1114  						ss.leaderChannel <- leaderElectionEvent{
  1115  							typ:             leaderElectionLeading,
  1116  							selectedService: ss,
  1117  						}
  1118  					},
  1119  					OnStoppedLeading: func() {
  1120  						ss.leaderChannel <- leaderElectionEvent{
  1121  							typ:             leaderElectionStoppedLeading,
  1122  							selectedService: ss,
  1123  						}
  1124  					},
  1125  				},
  1126  			})
  1127  		}
  1128  	}
  1129  }
  1131  func (ss *selectedService) stop() {
  1132  	if ss.cancel != nil {
  1133  		ss.cancel()
  1134  		<-ss.done
  1135  		ss.currentlyLeader = false
  1136  	}
  1137  }
  1139  type leaderElectionEventType int
  1141  const (
  1142  	leaderElectionLeading leaderElectionEventType = iota
  1143  	leaderElectionStoppedLeading
  1144  )
  1146  type leaderElectionEvent struct {
  1147  	typ             leaderElectionEventType
  1148  	selectedService *selectedService
  1149  }
  1151  type selectedPolicy struct {
  1152  	policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy
  1153  	// pre-compiled service selector
  1154  	serviceSelector labels.Selector
  1155  	// a cached list of network devices selected by this policy based on the regular expressions in the policy
  1156  	// and the latest known list of devices.
  1157  	selectedDevices []string
  1158  }