github.com/cilium/cilium@v1.16.2/pkg/egressgateway/manager.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package egressgateway
     5  
     6  import (
     7  	"cmp"
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"net/netip"
    12  	"slices"
    13  	"strings"
    14  	"sync"
    15  	"sync/atomic"
    16  
    17  	"github.com/cilium/hive/cell"
    18  	"github.com/sirupsen/logrus"
    19  	"github.com/spf13/pflag"
    20  	"k8s.io/client-go/util/workqueue"
    21  
    22  	"github.com/cilium/cilium/pkg/datapath/linux/config/defines"
    23  	"github.com/cilium/cilium/pkg/datapath/linux/sysctl"
    24  	"github.com/cilium/cilium/pkg/datapath/tables"
    25  	"github.com/cilium/cilium/pkg/datapath/tunnel"
    26  	"github.com/cilium/cilium/pkg/identity"
    27  	identityCache "github.com/cilium/cilium/pkg/identity/cache"
    28  	cilium_api_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    29  	"github.com/cilium/cilium/pkg/k8s/resource"
    30  	k8sTypes "github.com/cilium/cilium/pkg/k8s/types"
    31  	"github.com/cilium/cilium/pkg/labels"
    32  	"github.com/cilium/cilium/pkg/lock"
    33  	"github.com/cilium/cilium/pkg/logging"
    34  	"github.com/cilium/cilium/pkg/logging/logfields"
    35  	"github.com/cilium/cilium/pkg/maps/egressmap"
    36  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    37  	"github.com/cilium/cilium/pkg/option"
    38  	"github.com/cilium/cilium/pkg/time"
    39  	"github.com/cilium/cilium/pkg/trigger"
    40  )
    41  
    42  var (
    43  	log = logging.DefaultLogger.WithField(logfields.LogSubsys, "egressgateway")
    44  	// GatewayNotFoundIPv4 is a special IP value used as gatewayIP in the BPF policy
    45  	// map to indicate no gateway was found for the given policy
    46  	GatewayNotFoundIPv4 = netip.IPv4Unspecified()
    47  	// ExcludedCIDRIPv4 is a special IP value used as gatewayIP in the BPF policy map
    48  	// to indicate the entry is for an excluded CIDR and should skip egress gateway
    49  	ExcludedCIDRIPv4 = netip.MustParseAddr("0.0.0.1")
    50  	// EgressIPNotFoundIPv4 is a special IP value used as egressIP in the BPF policy map
    51  	// to indicate no egressIP was found for the given policy
    52  	EgressIPNotFoundIPv4 = netip.IPv4Unspecified()
    53  )
    54  
    55  // Cell provides a [Manager] for consumption with hive.
    56  var Cell = cell.Module(
    57  	"egressgateway",
    58  	"Egress Gateway allows originating traffic from specific IPv4 addresses",
    59  	cell.Config(defaultConfig),
    60  	cell.Provide(NewEgressGatewayManager),
    61  	cell.Provide(newPolicyResource),
    62  )
    63  
    64  type eventType int
    65  
    66  const (
    67  	eventNone = eventType(1 << iota)
    68  	eventK8sSyncDone
    69  	eventAddPolicy
    70  	eventDeletePolicy
    71  	eventUpdateEndpoint
    72  	eventDeleteEndpoint
    73  )
    74  
    75  type Config struct {
    76  	// Default amount of time between triggers of egress gateway state
    77  	// reconciliations are invoked
    78  	EgressGatewayReconciliationTriggerInterval time.Duration
    79  }
    80  
    81  var defaultConfig = Config{
    82  	EgressGatewayReconciliationTriggerInterval: 1 * time.Second,
    83  }
    84  
    85  func (def Config) Flags(flags *pflag.FlagSet) {
    86  	flags.Duration("egress-gateway-reconciliation-trigger-interval", def.EgressGatewayReconciliationTriggerInterval, "Time between triggers of egress gateway state reconciliations")
    87  }
    88  
    89  // The egressgateway manager stores the internal data tracking the node, policy,
    90  // endpoint, and lease mappings. It also hooks up all the callbacks to update
    91  // egress bpf policy map accordingly.
    92  type Manager struct {
    93  	lock.Mutex
    94  
    95  	// allCachesSynced is true when all k8s objects we depend on have had
    96  	// their initial state synced.
    97  	allCachesSynced bool
    98  
    99  	// nodes stores nodes sorted by their name. The entries are sorted
   100  	// to ensure consistent gateway selection across all agents.
   101  	nodes []nodeTypes.Node
   102  
   103  	// policies allows reading policy CRD from k8s.
   104  	policies resource.Resource[*Policy]
   105  
   106  	// nodesResource allows reading node CRD from k8s.
   107  	ciliumNodes resource.Resource[*cilium_api_v2.CiliumNode]
   108  
   109  	// endpoints allows reading endpoint CRD from k8s.
   110  	endpoints resource.Resource[*k8sTypes.CiliumEndpoint]
   111  
   112  	// policyConfigs stores policy configs indexed by policyID
   113  	policyConfigs map[policyID]*PolicyConfig
   114  
   115  	// policyConfigsBySourceIP stores slices of policy configs indexed by
   116  	// the policies' source/endpoint IPs
   117  	policyConfigsBySourceIP map[string][]*PolicyConfig
   118  
   119  	// epDataStore stores endpointId to endpoint metadata mapping
   120  	epDataStore map[endpointID]*endpointMetadata
   121  
   122  	// identityAllocator is used to fetch identity labels for endpoint updates
   123  	identityAllocator identityCache.IdentityAllocator
   124  
   125  	// policyMap communicates the active policies to the datapath.
   126  	policyMap egressmap.PolicyMap
   127  
   128  	// reconciliationTriggerInterval is the amount of time between triggers
   129  	// of reconciliations are invoked
   130  	reconciliationTriggerInterval time.Duration
   131  
   132  	// eventsBitmap is a bitmap that tracks which type of events has been
   133  	// received by the manager (e.g. node added or policy removed) since the
   134  	// last invocation of the reconciliation logic
   135  	eventsBitmap eventType
   136  
   137  	// reconciliationTrigger is the trigger used to reconcile the state of
   138  	// the node with the desired egress gateway state.
   139  	// The trigger is used to batch multiple updates together
   140  	reconciliationTrigger *trigger.Trigger
   141  
   142  	// reconciliationEventsCount keeps track of how many reconciliation
   143  	// events have occoured
   144  	reconciliationEventsCount atomic.Uint64
   145  
   146  	sysctl sysctl.Sysctl
   147  }
   148  
   149  type Params struct {
   150  	cell.In
   151  
   152  	Config            Config
   153  	DaemonConfig      *option.DaemonConfig
   154  	IdentityAllocator identityCache.IdentityAllocator
   155  	PolicyMap         egressmap.PolicyMap
   156  	Policies          resource.Resource[*Policy]
   157  	Nodes             resource.Resource[*cilium_api_v2.CiliumNode]
   158  	Endpoints         resource.Resource[*k8sTypes.CiliumEndpoint]
   159  	Sysctl            sysctl.Sysctl
   160  
   161  	Lifecycle cell.Lifecycle
   162  }
   163  
   164  func NewEgressGatewayManager(p Params) (out struct {
   165  	cell.Out
   166  
   167  	*Manager
   168  	defines.NodeOut
   169  	tunnel.EnablerOut
   170  }, err error) {
   171  	dcfg := p.DaemonConfig
   172  
   173  	if !dcfg.EnableIPv4EgressGateway {
   174  		return out, nil
   175  	}
   176  
   177  	if dcfg.IdentityAllocationMode == option.IdentityAllocationModeKVstore {
   178  		return out, errors.New("egress gateway is not supported in KV store identity allocation mode")
   179  	}
   180  
   181  	if dcfg.EnableHighScaleIPcache {
   182  		return out, errors.New("egress gateway is not supported in high scale IPcache mode")
   183  	}
   184  
   185  	if dcfg.EnableCiliumEndpointSlice {
   186  		return out, errors.New("egress gateway is not supported in combination with the CiliumEndpointSlice feature")
   187  	}
   188  
   189  	if !dcfg.EnableIPv4Masquerade || !dcfg.EnableBPFMasquerade {
   190  		return out, fmt.Errorf("egress gateway requires --%s=\"true\" and --%s=\"true\"", option.EnableIPv4Masquerade, option.EnableBPFMasquerade)
   191  	}
   192  
   193  	out.Manager, err = newEgressGatewayManager(p)
   194  	if err != nil {
   195  		return out, err
   196  	}
   197  
   198  	out.NodeDefines = map[string]string{
   199  		"ENABLE_EGRESS_GATEWAY": "1",
   200  	}
   201  
   202  	out.EnablerOut = tunnel.NewEnabler(true)
   203  
   204  	return out, nil
   205  }
   206  
   207  func newEgressGatewayManager(p Params) (*Manager, error) {
   208  	manager := &Manager{
   209  		policyConfigs:                 make(map[policyID]*PolicyConfig),
   210  		policyConfigsBySourceIP:       make(map[string][]*PolicyConfig),
   211  		epDataStore:                   make(map[endpointID]*endpointMetadata),
   212  		identityAllocator:             p.IdentityAllocator,
   213  		reconciliationTriggerInterval: p.Config.EgressGatewayReconciliationTriggerInterval,
   214  		policyMap:                     p.PolicyMap,
   215  		policies:                      p.Policies,
   216  		ciliumNodes:                   p.Nodes,
   217  		endpoints:                     p.Endpoints,
   218  		sysctl:                        p.Sysctl,
   219  	}
   220  
   221  	t, err := trigger.NewTrigger(trigger.Parameters{
   222  		Name:        "egress_gateway_reconciliation",
   223  		MinInterval: p.Config.EgressGatewayReconciliationTriggerInterval,
   224  		TriggerFunc: func(reasons []string) {
   225  			reason := strings.Join(reasons, ", ")
   226  			log.WithField(logfields.Reason, reason).Debug("reconciliation triggered")
   227  
   228  			manager.Lock()
   229  			defer manager.Unlock()
   230  
   231  			manager.reconcileLocked()
   232  		},
   233  	})
   234  	if err != nil {
   235  		return nil, err
   236  	}
   237  
   238  	manager.reconciliationTrigger = t
   239  
   240  	var wg sync.WaitGroup
   241  
   242  	ctx, cancel := context.WithCancel(context.Background())
   243  	p.Lifecycle.Append(cell.Hook{
   244  		OnStart: func(hc cell.HookContext) error {
   245  			wg.Add(1)
   246  			go func() {
   247  				defer wg.Done()
   248  				manager.processEvents(ctx)
   249  			}()
   250  
   251  			return nil
   252  		},
   253  		OnStop: func(hc cell.HookContext) error {
   254  			cancel()
   255  
   256  			wg.Wait()
   257  			return nil
   258  		},
   259  	})
   260  
   261  	return manager, nil
   262  }
   263  
   264  func (manager *Manager) setEventBitmap(events ...eventType) {
   265  	for _, e := range events {
   266  		manager.eventsBitmap |= e
   267  	}
   268  }
   269  
   270  func (manager *Manager) eventBitmapIsSet(events ...eventType) bool {
   271  	for _, e := range events {
   272  		if manager.eventsBitmap&e != 0 {
   273  			return true
   274  		}
   275  	}
   276  
   277  	return false
   278  }
   279  
   280  // getIdentityLabels waits for the global identities to be populated to the cache,
   281  // then looks up identity by ID from the cached identity allocator and return its labels.
   282  func (manager *Manager) getIdentityLabels(securityIdentity uint32) (labels.Labels, error) {
   283  	identityCtx, cancel := context.WithTimeout(context.Background(), option.Config.KVstoreConnectivityTimeout)
   284  	defer cancel()
   285  	if err := manager.identityAllocator.WaitForInitialGlobalIdentities(identityCtx); err != nil {
   286  		return nil, fmt.Errorf("failed to wait for initial global identities: %w", err)
   287  	}
   288  
   289  	identity := manager.identityAllocator.LookupIdentityByID(identityCtx, identity.NumericIdentity(securityIdentity))
   290  	if identity == nil {
   291  		return nil, fmt.Errorf("identity %d not found", securityIdentity)
   292  	}
   293  	return identity.Labels, nil
   294  }
   295  
   296  // processEvents spawns a goroutine that waits for the agent to
   297  // sync with k8s and then runs the first reconciliation.
   298  func (manager *Manager) processEvents(ctx context.Context) {
   299  	var policySync, nodeSync, endpointSync bool
   300  	maybeTriggerReconcile := func() {
   301  		if !policySync || !nodeSync || !endpointSync {
   302  			return
   303  		}
   304  
   305  		manager.Lock()
   306  		defer manager.Unlock()
   307  
   308  		if manager.allCachesSynced {
   309  			return
   310  		}
   311  
   312  		manager.allCachesSynced = true
   313  		manager.setEventBitmap(eventK8sSyncDone)
   314  		manager.reconciliationTrigger.TriggerWithReason("k8s sync done")
   315  	}
   316  
   317  	// here we try to mimic the same exponential backoff retry logic used by
   318  	// the identity allocator, where the minimum retry timeout is set to 20
   319  	// milliseconds and the max number of attempts is 16 (so 20ms * 2^16 ==
   320  	// ~20 minutes)
   321  	endpointsRateLimit := workqueue.NewItemExponentialFailureRateLimiter(time.Millisecond*20, time.Minute*20)
   322  
   323  	policyEvents := manager.policies.Events(ctx)
   324  	nodeEvents := manager.ciliumNodes.Events(ctx)
   325  	endpointEvents := manager.endpoints.Events(ctx, resource.WithRateLimiter(endpointsRateLimit))
   326  
   327  	for {
   328  		select {
   329  		case <-ctx.Done():
   330  			return
   331  
   332  		case event := <-policyEvents:
   333  			if event.Kind == resource.Sync {
   334  				policySync = true
   335  				maybeTriggerReconcile()
   336  				event.Done(nil)
   337  			} else {
   338  				manager.handlePolicyEvent(event)
   339  			}
   340  
   341  		case event := <-nodeEvents:
   342  			if event.Kind == resource.Sync {
   343  				nodeSync = true
   344  				maybeTriggerReconcile()
   345  				event.Done(nil)
   346  			} else {
   347  				manager.handleNodeEvent(event)
   348  			}
   349  
   350  		case event := <-endpointEvents:
   351  			if event.Kind == resource.Sync {
   352  				endpointSync = true
   353  				maybeTriggerReconcile()
   354  				event.Done(nil)
   355  			} else {
   356  				manager.handleEndpointEvent(event)
   357  			}
   358  		}
   359  	}
   360  }
   361  
   362  func (manager *Manager) handlePolicyEvent(event resource.Event[*Policy]) {
   363  	switch event.Kind {
   364  	case resource.Upsert:
   365  		err := manager.onAddEgressPolicy(event.Object)
   366  		event.Done(err)
   367  	case resource.Delete:
   368  		manager.onDeleteEgressPolicy(event.Object)
   369  		event.Done(nil)
   370  	}
   371  }
   372  
   373  // Event handlers
   374  
   375  // onAddEgressPolicy parses the given policy config, and updates internal state
   376  // with the config fields.
   377  func (manager *Manager) onAddEgressPolicy(policy *Policy) error {
   378  	logger := log.WithField(logfields.CiliumEgressGatewayPolicyName, policy.Name)
   379  
   380  	config, err := ParseCEGP(policy)
   381  	if err != nil {
   382  		logger.WithError(err).Warn("Failed to parse CiliumEgressGatewayPolicy")
   383  		return err
   384  	}
   385  
   386  	manager.Lock()
   387  	defer manager.Unlock()
   388  
   389  	if _, ok := manager.policyConfigs[config.id]; !ok {
   390  		logger.Debug("Added CiliumEgressGatewayPolicy")
   391  	} else {
   392  		logger.Debug("Updated CiliumEgressGatewayPolicy")
   393  	}
   394  
   395  	config.updateMatchedEndpointIDs(manager.epDataStore)
   396  
   397  	manager.policyConfigs[config.id] = config
   398  
   399  	manager.setEventBitmap(eventAddPolicy)
   400  	manager.reconciliationTrigger.TriggerWithReason("policy added")
   401  	return nil
   402  }
   403  
   404  // onDeleteEgressPolicy deletes the internal state associated with the given
   405  // policy, including egress eBPF map entries.
   406  func (manager *Manager) onDeleteEgressPolicy(policy *Policy) {
   407  	configID := ParseCEGPConfigID(policy)
   408  
   409  	manager.Lock()
   410  	defer manager.Unlock()
   411  
   412  	logger := log.WithField(logfields.CiliumEgressGatewayPolicyName, configID.Name)
   413  
   414  	if manager.policyConfigs[configID] == nil {
   415  		logger.Warn("Can't delete CiliumEgressGatewayPolicy: policy not found")
   416  	}
   417  
   418  	logger.Debug("Deleted CiliumEgressGatewayPolicy")
   419  
   420  	delete(manager.policyConfigs, configID)
   421  
   422  	manager.setEventBitmap(eventDeletePolicy)
   423  	manager.reconciliationTrigger.TriggerWithReason("policy deleted")
   424  }
   425  
   426  func (manager *Manager) addEndpoint(endpoint *k8sTypes.CiliumEndpoint) error {
   427  	var epData *endpointMetadata
   428  	var err error
   429  	var identityLabels labels.Labels
   430  
   431  	manager.Lock()
   432  	defer manager.Unlock()
   433  
   434  	logger := log.WithFields(logrus.Fields{
   435  		logfields.K8sEndpointName: endpoint.Name,
   436  		logfields.K8sNamespace:    endpoint.Namespace,
   437  		logfields.K8sUID:          endpoint.UID,
   438  	})
   439  
   440  	if endpoint.Identity == nil {
   441  		logger.Warning("Endpoint is missing identity metadata, skipping update to egress policy.")
   442  		return nil
   443  	}
   444  
   445  	if identityLabels, err = manager.getIdentityLabels(uint32(endpoint.Identity.ID)); err != nil {
   446  		logger.WithError(err).
   447  			Warning("Failed to get identity labels for endpoint")
   448  		return err
   449  	}
   450  
   451  	if epData, err = getEndpointMetadata(endpoint, identityLabels); err != nil {
   452  		logger.WithError(err).
   453  			Error("Failed to get valid endpoint metadata, skipping update to egress policy.")
   454  		return nil
   455  	}
   456  
   457  	if _, ok := manager.epDataStore[epData.id]; ok {
   458  		logger.Debug("Updated CiliumEndpoint")
   459  	} else {
   460  		logger.Debug("Added CiliumEndpoint")
   461  	}
   462  
   463  	manager.epDataStore[epData.id] = epData
   464  
   465  	manager.setEventBitmap(eventUpdateEndpoint)
   466  	manager.reconciliationTrigger.TriggerWithReason("endpoint updated")
   467  
   468  	return nil
   469  }
   470  
   471  func (manager *Manager) deleteEndpoint(endpoint *k8sTypes.CiliumEndpoint) {
   472  	manager.Lock()
   473  	defer manager.Unlock()
   474  
   475  	logger := log.WithFields(logrus.Fields{
   476  		logfields.K8sEndpointName: endpoint.Name,
   477  		logfields.K8sNamespace:    endpoint.Namespace,
   478  		logfields.K8sUID:          endpoint.UID,
   479  	})
   480  
   481  	logger.Debug("Deleted CiliumEndpoint")
   482  	delete(manager.epDataStore, endpoint.UID)
   483  
   484  	manager.setEventBitmap(eventDeleteEndpoint)
   485  	manager.reconciliationTrigger.TriggerWithReason("endpoint deleted")
   486  }
   487  
   488  func (manager *Manager) handleEndpointEvent(event resource.Event[*k8sTypes.CiliumEndpoint]) {
   489  	endpoint := event.Object
   490  
   491  	if event.Kind == resource.Upsert {
   492  		event.Done(manager.addEndpoint(endpoint))
   493  	} else {
   494  		manager.deleteEndpoint(endpoint)
   495  		event.Done(nil)
   496  	}
   497  }
   498  
   499  // handleNodeEvent takes care of node upserts and removals.
   500  func (manager *Manager) handleNodeEvent(event resource.Event[*cilium_api_v2.CiliumNode]) {
   501  	defer event.Done(nil)
   502  
   503  	node := nodeTypes.ParseCiliumNode(event.Object)
   504  
   505  	manager.Lock()
   506  	defer manager.Unlock()
   507  
   508  	// Find the node if we already have it.
   509  	nidx, found := slices.BinarySearchFunc(manager.nodes, node, func(a nodeTypes.Node, b nodeTypes.Node) int {
   510  		return cmp.Compare(a.Name, b.Name)
   511  	})
   512  
   513  	if event.Kind == resource.Delete {
   514  		// Delete the node if we're aware of it.
   515  		if found {
   516  			manager.nodes = slices.Delete(manager.nodes, nidx, nidx+1)
   517  		}
   518  
   519  		manager.reconciliationTrigger.TriggerWithReason("node deleted")
   520  		return
   521  	}
   522  
   523  	// Update the node if we have it, otherwise insert in the correct
   524  	// position.
   525  	if found {
   526  		manager.nodes[nidx] = node
   527  	} else {
   528  		manager.nodes = slices.Insert(manager.nodes, nidx, node)
   529  	}
   530  
   531  	manager.reconciliationTrigger.TriggerWithReason("node updated")
   532  }
   533  
   534  func (manager *Manager) updatePoliciesMatchedEndpointIDs() {
   535  	for _, policy := range manager.policyConfigs {
   536  		policy.updateMatchedEndpointIDs(manager.epDataStore)
   537  	}
   538  }
   539  
   540  func (manager *Manager) updatePoliciesBySourceIP() {
   541  	manager.policyConfigsBySourceIP = make(map[string][]*PolicyConfig)
   542  
   543  	for _, policy := range manager.policyConfigs {
   544  		for _, ep := range policy.matchedEndpoints {
   545  			for _, epIP := range ep.ips {
   546  				ip := epIP.String()
   547  				manager.policyConfigsBySourceIP[ip] = append(manager.policyConfigsBySourceIP[ip], policy)
   548  			}
   549  		}
   550  	}
   551  }
   552  
   553  // policyMatches returns true if there exists at least one policy matching the
   554  // given parameters.
   555  //
   556  // This method takes:
   557  //   - a source IP: this is an optimization that allows to iterate only through
   558  //     policies that reference an endpoint with the given source IP
   559  //   - a callback function f: this function is invoked for each policy and for
   560  //     each combination of the policy's endpoints and destination/excludedCIDRs.
   561  //
   562  // The callback f takes as arguments:
   563  // - the given endpoint
   564  // - the destination CIDR
   565  // - a boolean value indicating if the CIDR belongs to the excluded ones
   566  // - the gatewayConfig of the  policy
   567  //
   568  // This method returns true whenever the f callback matches one of the endpoint
   569  // and CIDR tuples (i.e. whenever one callback invocation returns true)
   570  func (manager *Manager) policyMatches(sourceIP netip.Addr, f func(netip.Addr, netip.Prefix, bool, *gatewayConfig) bool) bool {
   571  	for _, policy := range manager.policyConfigsBySourceIP[sourceIP.String()] {
   572  		for _, ep := range policy.matchedEndpoints {
   573  			for _, endpointIP := range ep.ips {
   574  				if endpointIP != sourceIP {
   575  					continue
   576  				}
   577  
   578  				isExcludedCIDR := false
   579  				for _, dstCIDR := range policy.dstCIDRs {
   580  					if f(endpointIP, dstCIDR, isExcludedCIDR, &policy.gatewayConfig) {
   581  						return true
   582  					}
   583  				}
   584  
   585  				isExcludedCIDR = true
   586  				for _, excludedCIDR := range policy.excludedCIDRs {
   587  					if f(endpointIP, excludedCIDR, isExcludedCIDR, &policy.gatewayConfig) {
   588  						return true
   589  					}
   590  				}
   591  			}
   592  		}
   593  	}
   594  
   595  	return false
   596  }
   597  
   598  func (manager *Manager) regenerateGatewayConfigs() {
   599  	for _, policyConfig := range manager.policyConfigs {
   600  		policyConfig.regenerateGatewayConfig(manager)
   601  	}
   602  }
   603  
   604  func (manager *Manager) relaxRPFilter() error {
   605  	var sysSettings []tables.Sysctl
   606  	ifSet := make(map[string]struct{})
   607  
   608  	for _, pc := range manager.policyConfigs {
   609  		if !pc.gatewayConfig.localNodeConfiguredAsGateway {
   610  			continue
   611  		}
   612  
   613  		ifaceName := pc.gatewayConfig.ifaceName
   614  		if _, ok := ifSet[ifaceName]; !ok {
   615  			ifSet[ifaceName] = struct{}{}
   616  			sysSettings = append(sysSettings, tables.Sysctl{
   617  				Name:      []string{"net", "ipv4", "conf", ifaceName, "rp_filter"},
   618  				Val:       "2",
   619  				IgnoreErr: false,
   620  			})
   621  		}
   622  	}
   623  
   624  	if len(sysSettings) == 0 {
   625  		return nil
   626  	}
   627  
   628  	return manager.sysctl.ApplySettings(sysSettings)
   629  }
   630  
   631  func (manager *Manager) addMissingEgressRules() {
   632  	egressPolicies := map[egressmap.EgressPolicyKey4]egressmap.EgressPolicyVal4{}
   633  	manager.policyMap.IterateWithCallback(
   634  		func(key *egressmap.EgressPolicyKey4, val *egressmap.EgressPolicyVal4) {
   635  			egressPolicies[*key] = *val
   636  		})
   637  
   638  	addEgressRule := func(endpointIP netip.Addr, dstCIDR netip.Prefix, excludedCIDR bool, gwc *gatewayConfig) {
   639  		policyKey := egressmap.NewEgressPolicyKey4(endpointIP, dstCIDR)
   640  		policyVal, policyPresent := egressPolicies[policyKey]
   641  
   642  		gatewayIP := gwc.gatewayIP
   643  		if excludedCIDR {
   644  			gatewayIP = ExcludedCIDRIPv4
   645  		}
   646  
   647  		if policyPresent && policyVal.Match(gwc.egressIP, gatewayIP) {
   648  			return
   649  		}
   650  
   651  		logger := log.WithFields(logrus.Fields{
   652  			logfields.SourceIP:        endpointIP,
   653  			logfields.DestinationCIDR: dstCIDR.String(),
   654  			logfields.EgressIP:        gwc.egressIP,
   655  			logfields.GatewayIP:       gatewayIP,
   656  		})
   657  
   658  		if err := manager.policyMap.Update(endpointIP, dstCIDR, gwc.egressIP, gatewayIP); err != nil {
   659  			logger.WithError(err).Error("Error applying egress gateway policy")
   660  		} else {
   661  			logger.Debug("Egress gateway policy applied")
   662  		}
   663  	}
   664  
   665  	for _, policyConfig := range manager.policyConfigs {
   666  		policyConfig.forEachEndpointAndCIDR(addEgressRule)
   667  	}
   668  }
   669  
   670  // removeUnusedEgressRules is responsible for removing any entry in the egress policy BPF map which
   671  // is not baked by an actual k8s CiliumEgressGatewayPolicy.
   672  func (manager *Manager) removeUnusedEgressRules() {
   673  	egressPolicies := map[egressmap.EgressPolicyKey4]egressmap.EgressPolicyVal4{}
   674  	manager.policyMap.IterateWithCallback(
   675  		func(key *egressmap.EgressPolicyKey4, val *egressmap.EgressPolicyVal4) {
   676  			egressPolicies[*key] = *val
   677  		})
   678  
   679  	for policyKey, policyVal := range egressPolicies {
   680  		matchPolicy := func(endpointIP netip.Addr, dstCIDR netip.Prefix, excludedCIDR bool, gwc *gatewayConfig) bool {
   681  			gatewayIP := gwc.gatewayIP
   682  			if excludedCIDR {
   683  				gatewayIP = ExcludedCIDRIPv4
   684  			}
   685  
   686  			return policyKey.Match(endpointIP, dstCIDR) && policyVal.Match(gwc.egressIP, gatewayIP)
   687  		}
   688  
   689  		if manager.policyMatches(policyKey.GetSourceIP(), matchPolicy) {
   690  			continue
   691  		}
   692  
   693  		logger := log.WithFields(logrus.Fields{
   694  			logfields.SourceIP:        policyKey.GetSourceIP(),
   695  			logfields.DestinationCIDR: policyKey.GetDestCIDR().String(),
   696  			logfields.EgressIP:        policyVal.GetEgressAddr(),
   697  			logfields.GatewayIP:       policyVal.GetGatewayAddr(),
   698  		})
   699  
   700  		if err := manager.policyMap.Delete(policyKey.GetSourceIP(), policyKey.GetDestCIDR()); err != nil {
   701  			logger.WithError(err).Error("Error removing egress gateway policy")
   702  		} else {
   703  			logger.Debug("Egress gateway policy removed")
   704  		}
   705  	}
   706  }
   707  
   708  // reconcileLocked is responsible for reconciling the state of the manager (i.e. the
   709  // desired state) with the actual state of the node (egress policy map entries).
   710  //
   711  // Whenever it encounters an error, it will just log it and move to the next
   712  // item, in order to reconcile as many states as possible.
   713  func (manager *Manager) reconcileLocked() {
   714  	if !manager.allCachesSynced {
   715  		return
   716  	}
   717  
   718  	switch {
   719  	// on eventK8sSyncDone we need to update all caches unconditionally as
   720  	// we don't know which k8s events/resources were received during the
   721  	// initial k8s sync
   722  	case manager.eventBitmapIsSet(eventUpdateEndpoint, eventDeleteEndpoint, eventK8sSyncDone):
   723  		manager.updatePoliciesMatchedEndpointIDs()
   724  		fallthrough
   725  	case manager.eventBitmapIsSet(eventAddPolicy, eventDeletePolicy):
   726  		manager.updatePoliciesBySourceIP()
   727  	}
   728  
   729  	manager.regenerateGatewayConfigs()
   730  
   731  	// Sysctl updates are handled by a reconciler, with the initial update attempting to wait some time
   732  	// for a synchronous reconciliation. Thus these updates are already resilient so in case of failure
   733  	// our best course of action is to log the error and continue with the reconciliation.
   734  	//
   735  	// The rp_filter setting is only important for traffic originating from endpoints on the same host (i.e.
   736  	// egw traffic being forwarded from a local Pod endpoint to the gateway on the same node).
   737  	// Therefore, for the sake of resiliency, it is acceptable for EGW to continue reconciling gatewayConfigs
   738  	// even if the rp_filter setting are failing.
   739  	if err := manager.relaxRPFilter(); err != nil {
   740  		log.WithError(err).Error("Error relaxing rp_filter for gateway interfaces. "+
   741  			"Selected egress gateway interfaces require rp_filter settings to use loose mode (rp_filter=2) for gateway forwarding to work correctly. ",
   742  			"This may cause connectivity issues for egress gateway traffic being forwarded through this node for Pods running on the same host. ")
   743  	}
   744  
   745  	// The order of the next 2 function calls matters, as by first adding missing policies and
   746  	// only then removing obsolete ones we make sure there will be no connectivity disruption
   747  	manager.addMissingEgressRules()
   748  	manager.removeUnusedEgressRules()
   749  
   750  	// clear the events bitmap
   751  	manager.eventsBitmap = 0
   752  
   753  	manager.reconciliationEventsCount.Add(1)
   754  }