
     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     4  package redirectpolicy
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"net"
    11  	"net/netip"
    12  	"sync"
    14  	""
    15  	""
    16  	""
    18  	agentK8s ""
    19  	cmtypes ""
    20  	""
    21  	""
    22  	""
    23  	""
    24  	slimcorev1 ""
    25  	k8sUtils ""
    26  	lb ""
    27  	""
    28  	""
    29  	""
    30  	""
    31  	""
    32  	nodeTypes ""
    33  	""
    34  	""
    35  	serviceStore ""
    36  )
    38  var (
    39  	log                 = logging.DefaultLogger.WithField(logfields.LogSubsys, "redirectpolicy")
    40  	localRedirectSvcStr = "-local-redirect"
    41  )
    43  type svcManager interface {
    44  	DeleteService(frontend lb.L3n4Addr) (bool, error)
    45  	UpsertService(*lb.SVC) (bool, lb.ID, error)
    46  	TerminateUDPConnectionsToBackend(l3n4Addr *lb.L3n4Addr)
    47  }
    49  type svcCache interface {
    50  	EnsureService(svcID k8s.ServiceID, swg *lock.StoppableWaitGroup) bool
    51  	GetServiceAddrsWithType(svcID k8s.ServiceID, svcType lb.SVCType) (map[lb.FEPortName][]*lb.L3n4Addr, int)
    52  	GetServiceFrontendIP(svcID k8s.ServiceID, svcType lb.SVCType) net.IP
    53  }
    55  type endpointManager interface {
    56  	GetEndpointNetnsCookieByIP(ip netip.Addr) (uint64, error)
    57  	Subscribe(s endpointmanager.Subscriber)
    58  }
    60  // podID is pod name and namespace
    61  type podID = k8s.ServiceID
    63  // Manager manages configurations related to Local Redirect Policies
    64  // that enable redirecting traffic from the specified frontend to a set of node-local
    65  // backend pods selected based on the backend configuration. To do that, it keeps
    66  // track of add/delete events for resources like LRP, Pod and Service.
    67  // For every local redirect policy configuration, it creates a
    68  // new lb.SVCTypeLocalRedirect service with a frontend that has at least one node-local backend.
    69  type Manager struct {
    70  	// Service handler to manage service entries corresponding to redirect policies
    71  	svcManager svcManager
    73  	svcCache svcCache
    75  	localPods agentK8s.LocalPodResource
    77  	epManager endpointManager
    79  	skipLBMap lbmap.SkipLBMap
    81  	// Mutex to protect against concurrent access to the maps
    82  	mutex lock.Mutex
    84  	// Stores mapping of all the current redirect policy frontend to their
    85  	// respective policies
    86  	// Frontends are namespace agnostic
    87  	policyFrontendsByHash map[string]policyID
    88  	// Stores mapping of redirect policy serviceID to the corresponding policyID for
    89  	// easy lookup in policyConfigs
    90  	policyServices map[k8s.ServiceID]policyID
    91  	// Stores mapping of pods to redirect policies that select the pods
    92  	policyPods map[podID][]policyID
    93  	// Stores redirect policy configs indexed by policyID
    94  	policyConfigs map[policyID]*LRPConfig
    95  	// Stores mapping of pod endpoints to redirect policies that select the pods
    96  	policyEndpoints map[podID]sets.Set[policyID]
    98  	noNetnsCookieSupport bool
    99  }
   101  func NewRedirectPolicyManager(svc svcManager, svcCache *k8s.ServiceCache, lpr agentK8s.LocalPodResource, epM endpointManager) *Manager {
   102  	return &Manager{
   103  		svcManager:            svc,
   104  		svcCache:              svcCache,
   105  		epManager:             epM,
   106  		localPods:             lpr,
   107  		policyFrontendsByHash: make(map[string]policyID),
   108  		policyServices:        make(map[k8s.ServiceID]policyID),
   109  		policyPods:            make(map[podID][]policyID),
   110  		policyConfigs:         make(map[policyID]*LRPConfig),
   111  		policyEndpoints:       make(map[podID]sets.Set[policyID]),
   112  	}
   113  }
   115  // Event handlers
   117  // AddRedirectPolicy parses the given local redirect policy config, and updates
   118  // internal state with the config fields.
   119  func (rpm *Manager) AddRedirectPolicy(config LRPConfig) (bool, error) {
   120  	rpm.mutex.Lock()
   121  	defer rpm.mutex.Unlock()
   123  	if config.skipRedirectFromBackend {
   124  		rpm.noNetnsCookieSupport = sync.OnceValue[bool](func() bool {
   125  			if _, err := netns.GetNetNSCookie(); errors.Is(err, unix.ENOPROTOOPT) {
   126  				return true
   127  			}
   128  			rpm.epManager.Subscribe(rpm)
   129  			if rpm.skipLBMap == nil {
   130  				var err error
   131  				rpm.skipLBMap, err = lbmap.NewSkipLBMap()
   132  				if err != nil {
   133  					log.WithError(err).Warn("failed to init cilium_skip_lb maps: " +
   134  						"policies with skipRedirectFromBackend flag set not supported")
   135  				}
   136  			}
   138  			return false
   139  		})()
   140  		if rpm.noNetnsCookieSupport {
   141  			err := fmt.Errorf("policy with skipRedirectFromBackend flag set not applied" +
   142  				":SO_NETNS_COOKIE not supported. Needs kernel version >= 5.8")
   143  			log.WithFields(logrus.Fields{
   144  				logfields.LRPType:      config.lrpType,
   145  				logfields.K8sNamespace:,
   146  				logfields.LRPName:,
   147  			}).Error(err)
   148  			return false, err
   149  		}
   150  		if rpm.skipLBMap == nil {
   151  			err := fmt.Errorf("policy with skipRedirectFromBackend flag set not applied:" +
   152  				"requires cilium_skip_lb maps")
   153  			log.WithFields(logrus.Fields{
   154  				logfields.LRPType:      config.lrpType,
   155  				logfields.K8sNamespace:,
   156  				logfields.LRPName:,
   157  			}).Error(err)
   158  			return false, err
   159  		}
   160  	}
   162  	_, ok := rpm.policyConfigs[]
   163  	if ok {
   164  		// TODO Existing policy update
   165  		log.Warn("Local redirect policy updates are not handled")
   166  		return true, nil
   167  	}
   169  	err := rpm.isValidConfig(config)
   170  	if err != nil {
   171  		return false, err
   172  	}
   174  	// New redirect policy
   175  	rpm.storePolicyConfig(config)
   177  	switch config.lrpType {
   178  	case lrpConfigTypeAddr:
   179  		log.WithFields(logrus.Fields{
   180  			logfields.LRPType:                  config.lrpType,
   181  			logfields.K8sNamespace:   ,
   182  			logfields.LRPName:        ,
   183  			logfields.LRPFrontends:             config.frontendMappings,
   184  			logfields.LRPLocalEndpointSelector: config.backendSelector,
   185  			logfields.LRPBackendPorts:          config.backendPorts,
   186  			logfields.LRPFrontendType:          config.frontendType,
   187  		}).Debug("Add local redirect policy")
   188  		pods, err := rpm.getLocalPodsForPolicy(&config)
   189  		if err != nil {
   190  			return false, err
   191  		}
   192  		if len(pods) == 0 {
   193  			return true, nil
   194  		}
   195  		rpm.processConfig(&config, pods...)
   197  	case lrpConfigTypeSvc:
   198  		log.WithFields(logrus.Fields{
   199  			logfields.LRPType:                  config.lrpType,
   200  			logfields.K8sNamespace:   ,
   201  			logfields.LRPName:        ,
   202  			logfields.K8sSvcID:                 config.serviceID,
   203  			logfields.LRPFrontends:             config.frontendMappings,
   204  			logfields.LRPLocalEndpointSelector: config.backendSelector,
   205  			logfields.LRPBackendPorts:          config.backendPorts,
   206  			logfields.LRPFrontendType:          config.frontendType,
   207  		}).Debug("Add local redirect policy")
   209  		err := rpm.getAndUpsertPolicySvcConfig(&config)
   210  		if err != nil {
   211  			return false, err
   212  		}
   213  	}
   215  	return true, nil
   216  }
   218  // DeleteRedirectPolicy deletes the internal state associated with the given policy.
   219  func (rpm *Manager) DeleteRedirectPolicy(config LRPConfig) error {
   220  	rpm.mutex.Lock()
   221  	defer rpm.mutex.Unlock()
   223  	storedConfig := rpm.policyConfigs[]
   224  	if storedConfig == nil {
   225  		return fmt.Errorf("local redirect policy to be deleted not found")
   226  	}
   227  	log.WithFields(logrus.Fields{"policyID":}).
   228  		Debug("Delete local redirect policy")
   230  	switch storedConfig.lrpType {
   231  	case lrpConfigTypeSvc:
   232  		rpm.deletePolicyService(storedConfig)
   233  	case lrpConfigTypeAddr:
   234  		for _, feM := range storedConfig.frontendMappings {
   235  			rpm.deletePolicyFrontend(storedConfig, feM.feAddr)
   236  		}
   237  	}
   239  	for p, pp := range rpm.policyPods {
   240  		var newPolicyList []policyID
   241  		for _, policy := range pp {
   242  			if policy != {
   243  				newPolicyList = append(newPolicyList, policy)
   244  			}
   245  		}
   246  		if len(newPolicyList) > 0 {
   247  			rpm.policyPods[p] = newPolicyList
   248  		} else {
   249  			delete(rpm.policyPods, p)
   250  		}
   251  	}
   252  	rpm.deletePolicyConfig(storedConfig)
   253  	return nil
   254  }
   256  // OnAddService handles Kubernetes service (clusterIP type) add events, and
   257  // updates the internal state for the policy config associated with the service.
   258  func (rpm *Manager) OnAddService(svcID k8s.ServiceID) {
   259  	rpm.mutex.Lock()
   260  	defer rpm.mutex.Unlock()
   261  	if len(rpm.policyConfigs) == 0 {
   262  		return
   263  	}
   265  	// Check if this service is selected by any of the current policies.
   266  	if id, ok := rpm.policyServices[svcID]; ok {
   267  		// TODO Add unit test to assert lrpConfigType among other things.
   268  		config := rpm.policyConfigs[id]
   269  		if !config.checkNamespace(svcID.Namespace) {
   270  			return
   271  		}
   272  		rpm.getAndUpsertPolicySvcConfig(config)
   273  	}
   274  }
   276  // OnDeleteService handles Kubernetes service deletes, and deletes the internal state
   277  // for the policy config that might be associated with the service.
   278  func (rpm *Manager) OnDeleteService(svcID k8s.ServiceID) {
   279  	rpm.mutex.Lock()
   280  	defer rpm.mutex.Unlock()
   281  	if len(rpm.policyConfigs) == 0 {
   282  		return
   283  	}
   285  	rpm.deleteService(svcID)
   286  }
   288  func (rpm *Manager) OnAddPod(pod *slimcorev1.Pod) {
   289  	rpm.mutex.Lock()
   290  	defer rpm.mutex.Unlock()
   292  	if len(rpm.policyConfigs) == 0 {
   293  		return
   294  	}
   295  	// If the pod already exists in the internal cache, ignore all the subsequent
   296  	// events since they'll be handled in the OnUpdatePod callback.
   297  	// GH issue #13136
   298  	// TODO add unit test
   299  	id := k8s.ServiceID{
   300  		Name:      pod.GetName(),
   301  		Namespace: pod.GetNamespace(),
   302  	}
   303  	if _, ok := rpm.policyPods[id]; ok {
   304  		return
   305  	}
   306  	rpm.OnUpdatePodLocked(pod, false, true)
   307  }
   309  func (rpm *Manager) OnUpdatePodLocked(pod *slimcorev1.Pod, removeOld bool, upsertNew bool) {
   310  	if len(rpm.policyConfigs) == 0 {
   311  		return
   312  	}
   314  	id := podID{
   315  		Name:      pod.GetName(),
   316  		Namespace: pod.GetNamespace(),
   317  	}
   319  	if removeOld {
   320  		// Check if the pod was previously selected by any of the policies.
   321  		if policies, ok := rpm.policyPods[id]; ok {
   322  			for _, policy := range policies {
   323  				config := rpm.policyConfigs[policy]
   324  				rpm.deletePolicyBackends(config, id)
   325  			}
   326  		}
   327  	}
   329  	if !upsertNew {
   330  		return
   331  	}
   332  	var podData *podMetadata
   333  	pendingPolicies := sets.New[policyID]()
   334  	// Check if any of the current redirect policies select this pod.
   335  	for _, config := range rpm.policyConfigs {
   336  		if config.checkNamespace(pod.GetNamespace()) && config.policyConfigSelectsPod(pod) {
   337  			if podData = rpm.getPodMetadata(pod); podData == nil {
   338  				return
   339  			}
   340  			if !config.skipRedirectFromBackend {
   341  				rpm.processConfig(config, podData)
   342  				continue
   343  			}
   344  		}
   345  	}
   346  	if podData == nil {
   347  		return
   348  	}
   349  	// Process redirect policies that need additional pod metadata.
   350  	for _, config := range rpm.policyConfigs {
   351  		if !config.skipRedirectFromBackend {
   352  			continue
   353  		}
   354  		if podData.netnsCookie != 0 {
   355  			rpm.processConfig(config, podData)
   356  			continue
   357  		}
   358  		addr, _ := netip.ParseAddr(podData.ips[0])
   359  		cookie, err := rpm.epManager.GetEndpointNetnsCookieByIP(addr)
   360  		if err != nil {
   361  			log.WithError(err).WithFields(logrus.Fields{
   362  				"addr": addr,
   363  			}).Debug("Track pod for endpoint metadata")
   364  			// Netns cookie not available yet.
   365  			// Track the pod for this policy in order to retrieve metadata via endpoint events.
   366  			pendingPolicies.Insert(
   367  			continue
   368  		}
   369  		log.WithFields(logrus.Fields{
   370  			logfields.K8sPodName:  pod.Name,
   371  			logfields.NetnsCookie: cookie,
   372  		}).Debug("Pod endpoint netNsCookie")
   373  		podData.netnsCookie = cookie
   374  	}
   375  	if len(pendingPolicies) > 0 {
   376  		rpm.policyEndpoints[id] = pendingPolicies
   377  	}
   378  }
   380  func (rpm *Manager) OnUpdatePod(pod *slimcorev1.Pod, needsReassign bool, ready bool) {
   381  	rpm.mutex.Lock()
   382  	defer rpm.mutex.Unlock()
   383  	// TODO add unit test to validate that we get callbacks only for relevant events
   384  	rpm.OnUpdatePodLocked(pod, needsReassign || !ready, ready)
   385  }
   387  func (rpm *Manager) OnDeletePod(pod *slimcorev1.Pod) {
   388  	rpm.mutex.Lock()
   389  	defer rpm.mutex.Unlock()
   390  	if len(rpm.policyConfigs) == 0 {
   391  		return
   392  	}
   393  	id := k8s.ServiceID{
   394  		Name:      pod.GetName(),
   395  		Namespace: pod.GetNamespace(),
   396  	}
   398  	if policies, ok := rpm.policyPods[id]; ok {
   399  		for _, policy := range policies {
   400  			config := rpm.policyConfigs[policy]
   401  			rpm.deletePolicyBackends(config, id)
   402  		}
   403  		delete(rpm.policyPods, id)
   404  	}
   405  	delete(rpm.policyEndpoints, id)
   406  }
   408  func (rpm *Manager) EndpointCreated(ep *endpoint.Endpoint) {
   409  	podID := k8s.ServiceID{
   410  		Name:      ep.GetK8sPodName(),
   411  		Namespace: ep.GetK8sNamespace(),
   412  	}
   413  	rpm.mutex.Lock()
   414  	defer rpm.mutex.Unlock()
   415  	if policyIDs, found := rpm.policyEndpoints[podID]; found {
   416  		for _, id := range policyIDs.UnsortedList() {
   417  			config := rpm.policyConfigs[id]
   418  			// Track policies that need additional pod metadata for applying the policies.
   419  			if !config.skipRedirectFromBackend {
   420  				continue
   421  			}
   422  			podStore, _ := rpm.localPods.Store(context.TODO())
   423  			pod, exists, err := podStore.GetByKey(resource.Key{Name: podID.Name, Namespace: podID.Namespace})
   424  			if err != nil || !exists {
   425  				return
   426  			}
   427  			if k8sUtils.GetLatestPodReadiness(pod.Status) != slimcorev1.ConditionTrue {
   428  				return
   429  			}
   430  			podData := rpm.getPodMetadata(pod)
   431  			if podData == nil {
   432  				// This is a sanity check in case pod data isn't available yet.
   433  				return
   434  			}
   435  			podData.netnsCookie = ep.NetNsCookie
   436  			log.WithFields(logrus.Fields{
   437  				"podID":               podID,
   438  				logfields.NetnsCookie: ep.NetNsCookie,
   439  			}).Debug("Endpoint event metadata")
   440  			rpm.processConfig(config, podData)
   441  		}
   442  	}
   443  }
   445  func (rpm *Manager) EndpointDeleted(ep *endpoint.Endpoint, conf endpoint.DeleteConfig) {
   446  	// No-op as clean-up is done in corresponding pod events.
   447  }
   449  func (rpm *Manager) EndpointRestored(ep *endpoint.Endpoint) {
   450  	rpm.EndpointCreated(ep)
   451  }
   453  // podMetadata stores relevant metadata associated with a pod that's updated during pod
   454  // add/update events
   455  type podMetadata struct {
   456  	labels map[string]string
   457  	// id the pod's name and namespace
   458  	id podID
   459  	// ips are pod's unique IPs
   460  	ips []string
   461  	// namedPorts stores pod port and protocol indexed by the port name
   462  	namedPorts serviceStore.PortConfiguration
   463  	// netnsCookie is the network namespace cookie
   464  	netnsCookie uint64
   465  }
   467  // Note: Following functions need to be called with the redirect policy manager lock.
   469  // getAndUpsertPolicySvcConfig gets service frontends for the given config service
   470  // and upserts the service frontends.
   471  func (rpm *Manager) getAndUpsertPolicySvcConfig(config *LRPConfig) error {
   472  	switch config.frontendType {
   473  	case svcFrontendAll:
   474  		// Get all the service frontends.
   475  		addrsByPort, feIPsCount := rpm.svcCache.GetServiceAddrsWithType(*config.serviceID,
   476  			lb.SVCTypeClusterIP)
   477  		config.frontendMappings = make([]*feMapping, 0, len(addrsByPort)*feIPsCount)
   478  		for p, addrs := range addrsByPort {
   479  			for _, addr := range addrs {
   480  				feM := &feMapping{
   481  					feAddr: addr,
   482  					fePort: string(p),
   483  				}
   484  				config.frontendMappings = append(config.frontendMappings, feM)
   485  			}
   486  			rpm.updateConfigSvcFrontend(config, addrs...)
   487  		}
   489  	case svcFrontendSinglePort:
   490  		// Get service frontend with the clusterIP and the policy config (unnamed) port.
   491  		ip := rpm.svcCache.GetServiceFrontendIP(*config.serviceID, lb.SVCTypeClusterIP)
   492  		if ip == nil {
   493  			// The LRP will be applied when the selected service is added later.
   494  			return nil
   495  		}
   496  		addrCluster := cmtypes.MustAddrClusterFromIP(ip)
   497  		config.frontendMappings[0].feAddr.AddrCluster = addrCluster
   498  		rpm.updateConfigSvcFrontend(config, config.frontendMappings[0].feAddr)
   500  	case svcFrontendNamedPorts:
   501  		// Get service frontends with the clusterIP and the policy config named ports.
   502  		ports := make([]string, len(config.frontendMappings))
   503  		for i, mapping := range config.frontendMappings {
   504  			ports[i] = mapping.fePort
   505  		}
   506  		ip := rpm.svcCache.GetServiceFrontendIP(*config.serviceID, lb.SVCTypeClusterIP)
   507  		if ip == nil {
   508  			// The LRP will be applied when the selected service is added later.
   509  			return nil
   510  		}
   511  		addrCluster := cmtypes.MustAddrClusterFromIP(ip)
   512  		for _, feM := range config.frontendMappings {
   513  			feM.feAddr.AddrCluster = addrCluster
   514  			rpm.updateConfigSvcFrontend(config, feM.feAddr)
   515  		}
   516  	}
   518  	pods, err := rpm.getLocalPodsForPolicy(config)
   519  	if err != nil {
   520  		return err
   521  	}
   522  	if len(pods) > 0 {
   523  		rpm.processConfig(config, pods...)
   524  	}
   525  	return nil
   526  }
   528  // storePolicyConfig stores various state for the given policy config.
   529  func (rpm *Manager) storePolicyConfig(config LRPConfig) {
   530  	rpm.policyConfigs[] = &config
   532  	switch config.lrpType {
   533  	case lrpConfigTypeAddr:
   534  		for _, feM := range config.frontendMappings {
   535  			rpm.policyFrontendsByHash[feM.feAddr.Hash()] =
   536  		}
   537  	case lrpConfigTypeSvc:
   538  		rpm.policyServices[*config.serviceID] =
   539  	}
   540  }
   542  // deletePolicyConfig cleans up stored state for the given policy config.
   543  func (rpm *Manager) deletePolicyConfig(config *LRPConfig) {
   544  	switch config.lrpType {
   545  	case lrpConfigTypeAddr:
   546  		for _, feM := range config.frontendMappings {
   547  			delete(rpm.policyFrontendsByHash, feM.feAddr.Hash())
   548  		}
   549  	case lrpConfigTypeSvc:
   550  		delete(rpm.policyServices, *config.serviceID)
   551  	}
   552  	delete(rpm.policyConfigs,
   553  }
   555  func (rpm *Manager) updateConfigSvcFrontend(config *LRPConfig, frontends ...*frontend) {
   556  	for _, f := range frontends {
   557  		rpm.policyFrontendsByHash[f.Hash()] =
   558  	}
   559  	rpm.policyConfigs[] = config
   560  }
   562  func (rpm *Manager) deletePolicyBackends(config *LRPConfig, podID podID) {
   563  	l3nL4Addrs := sets.New[*lb.L3n4Addr]()
   565  	for _, fe := range config.frontendMappings {
   566  		newBes := make([]backend, 0, len(fe.podBackends))
   567  		for _, be := range fe.podBackends {
   568  			// Remove the pod from the frontend's backends slice, keeping the
   569  			// order same.
   570  			if be.podID != podID {
   571  				newBes = append(newBes, be)
   572  				continue
   573  			}
   574  			if config.skipRedirectFromBackend {
   575  				if be.AddrCluster.Is4() {
   576  					rpm.skipLBMap.DeleteLB4ByNetnsCookie(be.podNetnsCookie)
   577  				} else {
   578  					rpm.skipLBMap.DeleteLB6ByNetnsCookie(be.podNetnsCookie)
   579  				}
   580  			}
   581  			l3nL4Addrs.Insert(&be.L3n4Addr)
   582  		}
   583  		fe.podBackends = newBes
   584  		rpm.notifyPolicyBackendDelete(config, fe)
   585  	}
   586  	for _, addr := range l3nL4Addrs.UnsortedList() {
   587  		rpm.svcManager.TerminateUDPConnectionsToBackend(addr)
   588  	}
   589  }
   591  // Deletes service entry for the specified frontend.
   592  func (rpm *Manager) deletePolicyFrontend(config *LRPConfig, frontend *frontend) {
   593  	found, err := rpm.svcManager.DeleteService(*frontend)
   594  	delete(rpm.policyFrontendsByHash, frontend.Hash())
   595  	if !found || err != nil {
   596  		log.WithError(err).Debugf("Local redirect service for policy %v not deleted",
   598  	}
   599  	if config.skipRedirectFromBackend {
   600  		// Delete skip_lb map entries.
   601  		addr := frontend.AddrCluster
   602  		if addr.Is4() {
   603  			rpm.skipLBMap.DeleteLB4ByAddrPort(addr.AsNetIP(), frontend.Port)
   604  		} else {
   605  			rpm.skipLBMap.DeleteLB6ByAddrPort(addr.AsNetIP(), frontend.Port)
   606  		}
   607  	}
   608  }
   610  // Updates service manager with the new set of backends now configured in 'config'.
   611  func (rpm *Manager) notifyPolicyBackendDelete(config *LRPConfig, frontendMapping *feMapping) {
   612  	if len(frontendMapping.podBackends) > 0 {
   613  		rpm.upsertService(config, frontendMapping)
   614  	} else {
   615  		// No backends so remove the service entry.
   616  		found, err := rpm.svcManager.DeleteService(*frontendMapping.feAddr)
   617  		if !found || err != nil {
   618  			log.WithError(err).Errorf("Local redirect service for policy (%v)"+
   619  				" with frontend (%v) not deleted",, frontendMapping.feAddr)
   620  		}
   621  		if config.lrpType == lrpConfigTypeSvc {
   622  			if restored := rpm.svcCache.EnsureService(*config.serviceID, lock.NewStoppableWaitGroup()); restored {
   623  				log.WithFields(logrus.Fields{
   624  					logfields.K8sSvcID: *config.serviceID,
   625  				}).Info("Restored service")
   626  			}
   627  		}
   628  	}
   629  }
   631  // deletePolicyService deletes internal state associated with the specified service.
   632  func (rpm *Manager) deletePolicyService(config *LRPConfig) {
   633  	for _, m := range config.frontendMappings {
   634  		rpm.deletePolicyFrontend(config, m.feAddr)
   635  	}
   636  	switch config.frontendType {
   637  	case svcFrontendAll:
   638  		config.frontendMappings = nil
   639  	case svcFrontendSinglePort:
   640  		fallthrough
   641  	case svcFrontendNamedPorts:
   642  		for _, feM := range config.frontendMappings {
   643  			feM.feAddr.AddrCluster = cmtypes.AddrCluster{}
   644  		}
   645  	}
   646  	// Retores the svc backends if there's still such a k8s svc.
   647  	swg := lock.NewStoppableWaitGroup()
   648  	svcID := *config.serviceID
   649  	if restored := rpm.svcCache.EnsureService(svcID, swg); restored {
   650  		log.WithFields(logrus.Fields{
   651  			logfields.K8sSvcID: svcID,
   652  		}).Debug("Restored service")
   653  	}
   654  }
   656  func (rpm *Manager) deleteService(svcID k8s.ServiceID) {
   657  	var (
   658  		rp policyID
   659  		ok bool
   660  	)
   661  	if rp, ok = rpm.policyServices[svcID]; !ok {
   662  		return
   663  	}
   664  	// Get the policy config that selects this service.
   665  	config := rpm.policyConfigs[rp]
   666  	for _, m := range config.frontendMappings {
   667  		rpm.deletePolicyFrontend(config, m.feAddr)
   668  	}
   669  	switch config.frontendType {
   670  	case svcFrontendAll:
   671  		config.frontendMappings = nil
   672  	case svcFrontendSinglePort:
   673  		fallthrough
   674  	case svcFrontendNamedPorts:
   675  		for _, feM := range config.frontendMappings {
   676  			feM.feAddr.AddrCluster = cmtypes.AddrCluster{}
   677  		}
   678  	}
   679  }
   681  func (rpm *Manager) plumbSkipLBEntries(mapping *feMapping) error {
   682  	if rpm.skipLBMap == nil {
   683  		// We have early checks for the maps, so this is just for a sanity check.
   684  		return fmt.Errorf("failed to plumb skip LB entries")
   685  	}
   686  	for _, pb := range mapping.podBackends {
   687  		if pb.podNetnsCookie == 0 {
   688  			return fmt.Errorf("no valid pod netns cookie")
   689  		}
   690  		addr := mapping.feAddr
   691  		if addr.AddrCluster.Is4() {
   692  			if err := rpm.skipLBMap.AddLB4(pb.podNetnsCookie, addr.AddrCluster.AsNetIP(), addr.Port); err != nil {
   693  				return fmt.Errorf("failed to add entry to skip_lb4 map: %w", err)
   694  			}
   695  		} else {
   696  			if err := rpm.skipLBMap.AddLB6(pb.podNetnsCookie, addr.AddrCluster.AsNetIP(), addr.Port); err != nil {
   697  				return fmt.Errorf("failed to add entry to skip_lb6 map: %w", err)
   698  			}
   699  		}
   700  	}
   702  	return nil
   703  }
   705  func (rpm *Manager) upsertPolicyMapping(config *LRPConfig, feMapping *feMapping) {
   706  	if config.skipRedirectFromBackend {
   707  		if err := rpm.plumbSkipLBEntries(feMapping); err != nil {
   708  			log.WithError(err).WithFields(logrus.Fields{
   709  				logfields.LRPType:      config.lrpType,
   710  				logfields.K8sNamespace:,
   711  				logfields.LRPName:,
   712  			}).Error("LRP not applied due to error in plumbing skip_lb map")
   713  			return
   714  		}
   715  	}
   716  	rpm.upsertService(config, feMapping)
   717  }
   719  // upsertService upserts a service entry for the given policy config that's ready.
   720  func (rpm *Manager) upsertService(config *LRPConfig, frontendMapping *feMapping) {
   721  	frontendAddr := lb.L3n4AddrID{
   722  		L3n4Addr: *frontendMapping.feAddr,
   723  		ID:       lb.ID(0),
   724  	}
   725  	backendAddrs := make([]*lb.Backend, 0, len(frontendMapping.podBackends))
   726  	for _, be := range frontendMapping.podBackends {
   727  		backendAddrs = append(backendAddrs, &lb.Backend{
   728  			NodeName: nodeTypes.GetName(),
   729  			L3n4Addr: be.L3n4Addr,
   730  		})
   731  	}
   732  	p := &lb.SVC{
   733  		Name: lb.ServiceName{
   734  			Name: + localRedirectSvcStr,
   735  			Namespace:,
   736  		},
   737  		Type:             lb.SVCTypeLocalRedirect,
   738  		Frontend:         frontendAddr,
   739  		Backends:         backendAddrs,
   740  		ExtTrafficPolicy: lb.SVCTrafficPolicyCluster,
   741  		IntTrafficPolicy: lb.SVCTrafficPolicyCluster,
   742  	}
   744  	if _, _, err := rpm.svcManager.UpsertService(p); err != nil {
   745  		if errors.Is(err, service.NewErrLocalRedirectServiceExists(p.Frontend, p.Name)) {
   746  			log.WithError(err).Debug("Error while inserting service in LB map")
   747  		} else {
   748  			log.WithError(err).Error("Error while inserting service in LB map")
   749  		}
   750  	}
   751  }
   753  // Returns a slice of endpoint pods metadata that are selected by the given policy config.
   754  func (rpm *Manager) getLocalPodsForPolicy(config *LRPConfig) ([]*podMetadata, error) {
   755  	var (
   756  		retPods []*podMetadata
   757  		podData *podMetadata
   758  		err     error
   759  	)
   761  	podStore, err := rpm.localPods.Store(context.TODO())
   762  	if err != nil {
   763  		log.WithError(err).Error("failed to get reference to local pod store")
   764  		return nil, err
   765  	}
   766  	for _, pod := range podStore.List() {
   767  		if !config.checkNamespace(pod.GetNamespace()) {
   768  			continue
   769  		}
   770  		if !config.policyConfigSelectsPod(pod) {
   771  			continue
   772  		}
   773  		if podData = rpm.getPodMetadata(pod); podData == nil {
   774  			continue
   775  		}
   776  		if k8sUtils.GetLatestPodReadiness(pod.Status) != slimcorev1.ConditionTrue {
   777  			continue
   778  		}
   779  		// The policy needs additional pod metadata.
   780  		if config.skipRedirectFromBackend {
   781  			addr, _ := netip.ParseAddr(podData.ips[0])
   782  			cookie, err := rpm.epManager.GetEndpointNetnsCookieByIP(addr)
   783  			if err != nil {
   784  				log.WithError(err).WithFields(logrus.Fields{
   785  					"addr": addr,
   786  				}).Debug("Track pod for endpoint metadata")
   787  				// Netns cookie not available yet.
   788  				// Track the pod for this policy in order to retrieve metadata via endpoint events.
   789  				podID := k8s.ServiceID{
   790  					Name:      pod.GetName(),
   791  					Namespace: pod.GetNamespace(),
   792  				}
   793  				pp, ok := rpm.policyEndpoints[podID]
   794  				if ok {
   795  					if !pp.Has( {
   796  						pp.Insert(
   797  					}
   798  				} else {
   799  					rpm.policyEndpoints[podID] = sets.New(
   800  				}
   801  				continue
   802  			}
   803  			log.WithFields(logrus.Fields{
   804  				logfields.K8sPodName:  pod.Name,
   805  				logfields.NetnsCookie: cookie,
   806  			}).Debug("Pod endpoint netNsCookie")
   807  			podData.netnsCookie = cookie
   808  		}
   810  		retPods = append(retPods, podData)
   811  	}
   813  	return retPods, nil
   814  }
   816  // isValidConfig validates the given policy config for duplicates.
   817  // Note: The config is already sanitized.
   818  func (rpm *Manager) isValidConfig(config LRPConfig) error {
   819  	switch config.lrpType {
   820  	case lrpConfigTypeAddr:
   821  		for _, feM := range config.frontendMappings {
   822  			fe := feM.feAddr
   823  			id, ok := rpm.policyFrontendsByHash[fe.Hash()]
   824  			if ok && != id.Name {
   825  				return fmt.Errorf("CiliumLocalRedirectPolicy for"+
   826  					"frontend %v already exists : %v", fe,
   827  			}
   828  		}
   830  	case lrpConfigTypeSvc:
   831  		p, ok := rpm.policyServices[*config.serviceID]
   832  		// Only 1 serviceMatcher policy is allowed for a service name within a namespace.
   833  		if ok && != "" &&
   834 == rpm.policyConfigs[p].id.Namespace {
   835  			return fmt.Errorf("CiliumLocalRedirectPolicy for"+
   836  				" service %v already exists in namespace %v", config.serviceID,
   838  		}
   839  	}
   841  	return nil
   842  }
   844  func (rpm *Manager) processConfig(config *LRPConfig, pods ...*podMetadata) {
   845  	if config.lrpType == lrpConfigTypeSvc && len(config.frontendMappings) == 0 {
   846  		// Frontend information will be available when the selected service is added.
   847  		return
   848  	}
   849  	switch config.frontendType {
   850  	case svcFrontendSinglePort:
   851  		fallthrough
   852  	case addrFrontendSinglePort:
   853  		rpm.processConfigWithSinglePort(config, pods...)
   854  	case svcFrontendNamedPorts:
   855  		fallthrough
   856  	case addrFrontendNamedPorts:
   857  		rpm.processConfigWithNamedPorts(config, pods...)
   858  	case svcFrontendAll:
   859  		if len(config.frontendMappings) > 1 {
   860  			// The retrieved service frontend has multiple ports, in which case
   861  			// Kubernetes mandates that the ports be named.
   862  			rpm.processConfigWithNamedPorts(config, pods...)
   863  		} else {
   864  			// The retrieved service frontend has only 1 port, in which case
   865  			// port names are optional.
   866  			rpm.processConfigWithSinglePort(config, pods...)
   867  		}
   868  	}
   869  }
   871  // processConfigWithSinglePort upserts a policy config frontend with the corresponding
   872  // backends.
   873  // Frontend <ip, port, protocol> is mapped to backend <ip, port, protocol> entry.
   874  // If a pod has multiple IPs, then there will be multiple backend entries created
   875  // for the pod with common <port, protocol>.
   876  func (rpm *Manager) processConfigWithSinglePort(config *LRPConfig, pods ...*podMetadata) {
   877  	var bes4 []backend
   878  	var bes6 []backend
   880  	// Generate and map pod backends to the policy frontend. The policy config
   881  	// is already sanitized, and has matching backend and frontend port protocol.
   882  	// We currently don't check which backends are updated before upserting a
   883  	// a service with the corresponding frontend. This can be optimized when LRPs
   884  	// are scaled up.
   885  	bePort := config.backendPorts[0]
   886  	feM := config.frontendMappings[0]
   887  	for _, pod := range pods {
   888  		for _, ip := range pod.ips {
   889  			beIP := net.ParseIP(ip)
   890  			if beIP == nil {
   891  				continue
   892  			}
   893  			be := backend{
   894  				lb.L3n4Addr{
   895  					AddrCluster: cmtypes.MustParseAddrCluster(ip),
   896  					L4Addr: lb.L4Addr{
   897  						Protocol: bePort.l4Addr.Protocol,
   898  						Port:     bePort.l4Addr.Port,
   899  					},
   900  				},, pod.netnsCookie,
   901  			}
   902  			if feM.feAddr.AddrCluster.Is4() && be.AddrCluster.Is4() {
   903  				if option.Config.EnableIPv4 {
   904  					bes4 = append(bes4, be)
   905  				}
   906  			} else if feM.feAddr.AddrCluster.Is6() && be.AddrCluster.Is6() {
   907  				if option.Config.EnableIPv6 {
   908  					bes6 = append(bes6, be)
   909  				}
   910  			}
   911  		}
   912  		if len(bes4) > 0 {
   913  			rpm.updateFrontendMapping(config, feM,, bes4)
   914  		} else if len(bes6) > 0 {
   915  			rpm.updateFrontendMapping(config, feM,, bes6)
   916  		}
   917  	}
   918  	rpm.upsertPolicyMapping(config, feM)
   919  }
   921  // processConfigWithNamedPorts upserts policy config frontends to the corresponding
   922  // backends matched by port names.
   923  func (rpm *Manager) processConfigWithNamedPorts(config *LRPConfig, pods ...*podMetadata) {
   924  	// Generate backends for the policy config's backend named ports, and then
   925  	// map the backends to policy frontends based on the named ports.
   926  	// We currently don't check which backends are updated before upserting a
   927  	// a service with the corresponding frontend. This can be optimized if LRPs
   928  	// are scaled up.
   929  	upsertFes := make([]*feMapping, 0, len(config.frontendMappings))
   930  	for _, feM := range config.frontendMappings {
   931  		namedPort := feM.fePort
   932  		var (
   933  			bes4   []backend
   934  			bes6   []backend
   935  			bePort *bePortInfo
   936  			ok     bool
   937  		)
   938  		if bePort, ok = config.backendPortsByPortName[namedPort]; !ok {
   939  			// The frontend named port not found in the backend ports map.
   940  			continue
   941  		}
   942  		if bePort.l4Addr.Protocol != feM.feAddr.Protocol {
   943  			continue
   944  		}
   945  		for _, pod := range pods {
   946  			if _, ok = pod.namedPorts[namedPort]; ok {
   947  				// Generate pod backends.
   948  				for _, ip := range pod.ips {
   949  					beIP := net.ParseIP(ip)
   950  					if beIP == nil {
   951  						continue
   952  					}
   953  					be := backend{
   954  						lb.L3n4Addr{
   955  							AddrCluster: cmtypes.MustParseAddrCluster(ip),
   956  							L4Addr: lb.L4Addr{
   957  								Protocol: bePort.l4Addr.Protocol,
   958  								Port:     bePort.l4Addr.Port,
   959  							},
   960  						},
   961, pod.netnsCookie,
   962  					}
   963  					if feM.feAddr.AddrCluster.Is4() && be.AddrCluster.Is4() {
   964  						if option.Config.EnableIPv4 {
   965  							bes4 = append(bes4, be)
   966  						}
   967  					} else if feM.feAddr.AddrCluster.Is6() && be.AddrCluster.Is6() {
   968  						if option.Config.EnableIPv6 {
   969  							bes6 = append(bes6, be)
   970  						}
   971  					}
   972  				}
   973  			}
   974  			if len(bes4) > 0 {
   975  				rpm.updateFrontendMapping(config, feM,, bes4)
   976  			} else if len(bes6) > 0 {
   977  				rpm.updateFrontendMapping(config, feM,, bes6)
   978  			}
   979  		}
   980  		if len(bes4) > 0 || len(bes6) > 0 {
   981  			upsertFes = append(upsertFes, feM)
   982  		}
   983  	}
   984  	for i := range upsertFes {
   985  		rpm.upsertPolicyMapping(config, upsertFes[i])
   986  	}
   987  }
   989  // updateFrontendMapping updates policy config internal state and updates
   990  // the policy frontend mapped backends.
   991  func (rpm *Manager) updateFrontendMapping(config *LRPConfig, frontendMapping *feMapping, podID podID, backends []backend) {
   992  	newFePods := make([]backend, 0, len(frontendMapping.podBackends)+len(backends))
   993  	updatePodBes := true
   994  	// Update the frontend mapped backends slice, keeping the order same.
   995  	for _, be := range frontendMapping.podBackends {
   996  		if be.podID == podID {
   997  			if updatePodBes {
   998  				updatePodBes = false
   999  				// Get the updated backends for the given pod.
  1000  				newFePods = append(newFePods, backends...)
  1001  			}
  1002  		} else {
  1003  			// Collect the unchanged backends for other pods.
  1004  			newFePods = append(newFePods, be)
  1005  		}
  1006  	}
  1007  	if updatePodBes {
  1008  		// New backend pod for the frontend
  1009  		newFePods = append(newFePods, backends...)
  1010  	}
  1011  	frontendMapping.podBackends = newFePods
  1013  	if podPolicies, ok := rpm.policyPods[podID]; ok {
  1014  		newPodPolicy := true
  1015  		for _, poID := range podPolicies {
  1016  			// Existing pod policy update
  1017  			if poID == {
  1018  				newPodPolicy = false
  1019  				break
  1020  			}
  1021  		}
  1022  		if newPodPolicy {
  1023  			// Pod selected by a new policy
  1024  			rpm.policyPods[podID] = append(rpm.policyPods[podID],
  1025  		}
  1026  	} else {
  1027  		// Pod selected by a policy for the first time
  1028  		pp := []policyID{}
  1029  		rpm.policyPods[podID] = pp
  1030  	}
  1031  }
  1033  func (rpm *Manager) getPodMetadata(pod *slimcorev1.Pod) *podMetadata {
  1034  	podIPs := k8sUtils.ValidIPs(pod.Status)
  1035  	if len(podIPs) == 0 {
  1036  		// IPs not available yet.
  1037  		return nil
  1038  	}
  1040  	namedPorts := make(serviceStore.PortConfiguration)
  1041  	for _, container := range pod.Spec.Containers {
  1042  		for _, port := range container.Ports {
  1043  			if port.Name == "" {
  1044  				continue
  1045  			}
  1046  			namedPorts[port.Name] = lb.NewL4Addr(lb.L4Type(port.Protocol),
  1047  				uint16(port.ContainerPort))
  1048  		}
  1049  	}
  1051  	return &podMetadata{
  1052  		ips:        podIPs,
  1053  		labels:     pod.GetLabels(),
  1054  		namedPorts: namedPorts,
  1055  		id: k8s.ServiceID{
  1056  			Name:      pod.GetName(),
  1057  			Namespace: pod.GetNamespace(),
  1058  		},
  1059  	}
  1060  }
  1062  func (rpm *Manager) GetLRPs() []*LRPConfig {
  1063  	rpm.mutex.Lock()
  1064  	defer rpm.mutex.Unlock()
  1066  	lrps := make([]*LRPConfig, 0, len(rpm.policyConfigs))
  1067  	for _, lrp := range rpm.policyConfigs {
  1068  		lrps = append(lrps, lrp)
  1069  	}
  1071  	return lrps
  1072  }