
     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     4  package service
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"net"
    10  	"net/netip"
    11  	"sync/atomic"
    13  	""
    14  	""
    16  	""
    17  	""
    18  	cmtypes ""
    19  	""
    20  	datapathOpt ""
    21  	""
    22  	datapathTypes ""
    23  	""
    24  	lb ""
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  	monitorAgent ""
    31  	monitorAPI ""
    32  	""
    33  	""
    34  	nodeTypes ""
    35  	""
    36  	""
    37  	""
    38  )
    40  // ErrLocalRedirectServiceExists represents an error when a Local redirect
    41  // service exists with the same Frontend.
    42  type ErrLocalRedirectServiceExists struct {
    43  	frontend lb.L3n4AddrID
    44  	name     lb.ServiceName
    45  }
    47  // NewErrLocalRedirectServiceExists returns a new ErrLocalRedirectServiceExists
    48  func NewErrLocalRedirectServiceExists(frontend lb.L3n4AddrID, name lb.ServiceName) error {
    49  	return &ErrLocalRedirectServiceExists{
    50  		frontend: frontend,
    51  		name:     name,
    52  	}
    53  }
    55  func (e ErrLocalRedirectServiceExists) Error() string {
    56  	return fmt.Sprintf("local-redirect service exists for "+
    57  		"frontend %v, skip update for svc %v", e.frontend,
    58  }
    60  func (e *ErrLocalRedirectServiceExists) Is(target error) bool {
    61  	t, ok := target.(*ErrLocalRedirectServiceExists)
    62  	if !ok {
    63  		return false
    64  	}
    65  	return e.frontend.DeepEqual(&t.frontend) && ==
    66  }
    68  // healthServer is used to manage HealthCheckNodePort listeners
    69  type healthServer interface {
    70  	UpsertService(svcID lb.ID, svcNS, svcName string, localEndpoints int, port uint16)
    71  	DeleteService(svcID lb.ID)
    72  }
    74  type svcInfo struct {
    75  	hash     string
    76  	frontend lb.L3n4AddrID
    77  	backends []*lb.Backend
    78  	// Hashed `backends`; pointing to the same objects.
    79  	backendByHash map[string]*lb.Backend
    81  	svcType                   lb.SVCType
    82  	svcExtTrafficPolicy       lb.SVCTrafficPolicy
    83  	svcIntTrafficPolicy       lb.SVCTrafficPolicy
    84  	svcNatPolicy              lb.SVCNatPolicy
    85  	sessionAffinity           bool
    86  	sessionAffinityTimeoutSec uint32
    87  	svcHealthCheckNodePort    uint16
    88  	healthcheckFrontendHash   string
    89  	svcName                   lb.ServiceName
    90  	loadBalancerSourceRanges  []*cidr.CIDR
    91  	l7LBProxyPort             uint16 // Non-zero for egress L7 LB services
    92  	LoopbackHostport          bool
    94  	restoredFromDatapath bool
    95  	// The hashes of the backends restored from the datapath and
    96  	// not yet heard about from the service cache.
    97  	restoredBackendHashes sets.Set[string]
    98  }
   100  func (svc *svcInfo) isL7LBService() bool {
   101  	return svc.l7LBProxyPort != 0
   102  }
   104  func (svc *svcInfo) deepCopyToLBSVC() *lb.SVC {
   105  	backends := make([]*lb.Backend, len(svc.backends))
   106  	for i, backend := range svc.backends {
   107  		backends[i] = backend.DeepCopy()
   108  	}
   109  	return &lb.SVC{
   110  		Frontend:            *svc.frontend.DeepCopy(),
   111  		Backends:            backends,
   112  		Type:                svc.svcType,
   113  		ExtTrafficPolicy:    svc.svcExtTrafficPolicy,
   114  		IntTrafficPolicy:    svc.svcIntTrafficPolicy,
   115  		NatPolicy:           svc.svcNatPolicy,
   116  		HealthCheckNodePort: svc.svcHealthCheckNodePort,
   117  		Name:                svc.svcName,
   118  		L7LBProxyPort:       svc.l7LBProxyPort,
   119  		LoopbackHostport:    svc.LoopbackHostport,
   120  	}
   121  }
   123  func (svc *svcInfo) isExtLocal() bool {
   124  	switch svc.svcType {
   125  	case lb.SVCTypeNodePort, lb.SVCTypeLoadBalancer, lb.SVCTypeExternalIPs:
   126  		return svc.svcExtTrafficPolicy == lb.SVCTrafficPolicyLocal
   127  	default:
   128  		return false
   129  	}
   130  }
   132  func (svc *svcInfo) isIntLocal() bool {
   133  	switch svc.svcType {
   134  	case lb.SVCTypeClusterIP, lb.SVCTypeNodePort, lb.SVCTypeLoadBalancer, lb.SVCTypeExternalIPs:
   135  		return svc.svcIntTrafficPolicy == lb.SVCTrafficPolicyLocal
   136  	default:
   137  		return false
   138  	}
   139  }
   141  func (svc *svcInfo) filterBackends(frontend lb.L3n4AddrID) bool {
   142  	switch svc.svcType {
   143  	case lb.SVCTypeLocalRedirect:
   144  		return true
   145  	default:
   146  		// When both traffic policies are Local, there is only the external scope, which
   147  		// should contain node-local backends only. Checking isExtLocal is still enough.
   148  		switch frontend.Scope {
   149  		case lb.ScopeExternal:
   150  			if svc.svcType == lb.SVCTypeClusterIP {
   151  				// ClusterIP doesn't support externalTrafficPolicy and has only the
   152  				// external scope, which contains only node-local backends when
   153  				// internalTrafficPolicy=Local.
   154  				return svc.isIntLocal()
   155  			}
   156  			return svc.isExtLocal()
   157  		case lb.ScopeInternal:
   158  			return svc.isIntLocal()
   159  		default:
   160  			return false
   161  		}
   162  	}
   163  }
   165  func (svc *svcInfo) useMaglev() bool {
   166  	if option.Config.NodePortAlg != option.NodePortAlgMaglev {
   167  		return false
   168  	}
   169  	// Provision the Maglev LUT for ClusterIP only if ExternalClusterIP is
   170  	// enabled because ClusterIP can also be accessed from outside with this
   171  	// setting. We don't do it unconditionally to avoid increasing memory
   172  	// footprint.
   173  	if svc.svcType == lb.SVCTypeClusterIP && !option.Config.ExternalClusterIP {
   174  		return false
   175  	}
   176  	// Wildcarded frontend is not exposed for external traffic.
   177  	if svc.svcType == lb.SVCTypeNodePort && isWildcardAddr(svc.frontend) {
   178  		return false
   179  	}
   180  	// Only provision the Maglev LUT for service types which are reachable
   181  	// from outside the node.
   182  	switch svc.svcType {
   183  	case lb.SVCTypeClusterIP,
   184  		lb.SVCTypeNodePort,
   185  		lb.SVCTypeLoadBalancer,
   186  		lb.SVCTypeHostPort,
   187  		lb.SVCTypeExternalIPs:
   188  		return true
   189  	}
   190  	return false
   191  }
   193  type L7LBInfo struct {
   194  	// Backend Sync registrations that are interested in Service backend changes
   195  	// to reflect this in a L7 loadbalancer (e.g. Envoy)
   196  	backendSyncRegistrations map[BackendSyncer]struct{}
   198  	// Name of the L7 LB resource (e.g. CEC) that needs this service to be redirected to an
   199  	// L7 Loadbalancer specified in that resource.
   200  	// Only one resource may do this for any given service.
   201  	ownerRef L7LBResourceName
   203  	// port number for L7 LB redirection. Can be zero if only backend sync
   204  	// has been requested.
   205  	proxyPort uint16
   207  	// (sub)set of service's frontend ports to be redirected. If empty, all frontend ports will be redirected.
   208  	ports []uint16
   209  }
   211  // isProtoAndPortMatch returns true if frontend has protocol TCP and its Port is in i.ports, or if
   212  // i.ports is empty.
   213  // 'ports' is typically short for no point optimizing the search.
   214  func (i *L7LBInfo) isProtoAndPortMatch(fe *lb.L4Addr) bool {
   215  	// L7 LB redirect is only supported for TCP frontends
   216  	// The below is to make sure that UDP and SCTP are not allowed instead of comparing with lb.TCP
   217  	// The reason is to avoid extra dependencies with ongoing work to differentiate protocols in datapath,
   218  	// which might add more values such as lb.Any, lb.None, etc.
   219  	if fe.Protocol == lb.UDP || fe.Protocol == lb.SCTP {
   220  		return false
   221  	}
   223  	// Empty 'ports' matches all ports
   224  	if len(i.ports) == 0 {
   225  		return true
   226  	}
   228  	for _, p := range i.ports {
   229  		if p == fe.Port {
   230  			return true
   231  		}
   232  	}
   233  	return false
   234  }
   236  type L7LBResourceName struct {
   237  	Namespace string
   238  	Name      string
   239  }
   241  func (svc *svcInfo) checkLBSourceRange() bool {
   242  	if option.Config.EnableSVCSourceRangeCheck {
   243  		return len(svc.loadBalancerSourceRanges) != 0
   244  	}
   246  	return false
   247  }
   249  // Service is a service handler. Its main responsibility is to reflect
   250  // service-related changes into BPF maps used by datapath BPF programs.
   251  // The changes can be triggered either by k8s_watcher or directly by
   252  // API calls to the /services endpoint.
   253  type Service struct {
   254  	lock.RWMutex
   256  	svcByHash map[string]*svcInfo
   257  	svcByID   map[lb.ID]*svcInfo
   259  	backendRefCount counter.Counter[string]
   260  	// only used to keep track of the existing hash->ID mapping,
   261  	// not for loadbalancing decisions.
   262  	backendByHash map[string]*lb.Backend
   264  	healthServer healthServer
   265  	monitorAgent monitorAgent.Agent
   267  	lbmap         datapathTypes.LBMap
   268  	lastUpdatedTs atomic.Value
   270  	l7lbSvcs map[lb.ServiceName]*L7LBInfo
   272  	backendConnectionHandler sockets.SocketDestroyer
   274  	backendDiscovery datapathTypes.NodeNeighbors
   275  }
   277  // newService creates a new instance of the service handler.
   278  func newService(monitorAgent monitorAgent.Agent, lbmap datapathTypes.LBMap, backendDiscoveryHandler datapathTypes.NodeNeighbors) *Service {
   279  	var localHealthServer healthServer
   280  	if option.Config.EnableHealthCheckNodePort {
   281  		localHealthServer = healthserver.New()
   282  	}
   284  	svc := &Service{
   285  		svcByHash:                map[string]*svcInfo{},
   286  		svcByID:                  map[lb.ID]*svcInfo{},
   287  		backendRefCount:          counter.Counter[string]{},
   288  		backendByHash:            map[string]*lb.Backend{},
   289  		monitorAgent:             monitorAgent,
   290  		healthServer:             localHealthServer,
   291  		lbmap:                    lbmap,
   292  		l7lbSvcs:                 map[lb.ServiceName]*L7LBInfo{},
   293  		backendConnectionHandler: backendConnectionHandler{},
   294  		backendDiscovery:         backendDiscoveryHandler,
   295  	}
   296  	svc.lastUpdatedTs.Store(time.Now())
   298  	return svc
   299  }
   301  // RegisterL7LBServiceRedirect makes the given service to be locally redirected to the
   302  // given proxy port.
   303  func (s *Service) RegisterL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName, proxyPort uint16, frontendPorts []uint16) error {
   304  	if proxyPort == 0 {
   305  		return errors.New("proxy port for L7 LB redirection must be nonzero")
   306  	}
   308  	if logging.CanLogAt(log.Logger, logrus.DebugLevel) {
   309  		log.WithFields(logrus.Fields{
   310  			logfields.ServiceName:       serviceName.Name,
   311  			logfields.ServiceNamespace:  serviceName.Namespace,
   312  			logfields.L7LBProxyPort:     proxyPort,
   313  			logfields.L7LBFrontendPorts: frontendPorts,
   314  		}).Debug("Registering service for L7 proxy port redirection")
   315  	}
   317  	s.Lock()
   318  	defer s.Unlock()
   320  	err := s.registerL7LBServiceRedirect(serviceName, resourceName, proxyPort, frontendPorts)
   321  	if err != nil {
   322  		return err
   323  	}
   325  	return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace)
   326  }
   328  // 's' must be locked
   329  func (s *Service) registerL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName, proxyPort uint16, frontendPorts []uint16) error {
   330  	info := s.l7lbSvcs[serviceName]
   331  	if info == nil {
   332  		info = &L7LBInfo{}
   333  	}
   335  	// Only one CEC resource for a given service may request L7 LB redirection at a time.
   336  	empty := L7LBResourceName{}
   337  	if info.ownerRef != empty && info.ownerRef != resourceName {
   338  		return fmt.Errorf("Service %q already registered for L7 LB redirection via a proxy resource %q", serviceName, info.ownerRef)
   339  	}
   341  	info.ownerRef = resourceName
   342  	info.proxyPort = proxyPort
   344  	if len(frontendPorts) == 0 {
   345  		info.ports = nil
   346  	} else {
   347  		info.ports = make([]uint16, len(frontendPorts))
   348  		copy(info.ports, frontendPorts)
   349  	}
   351  	s.l7lbSvcs[serviceName] = info
   353  	return nil
   354  }
   356  // RegisterL7LBServiceBackendSync registers a BackendSync to be informed when the backends of a Service change.
   357  func (s *Service) RegisterL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) error {
   358  	if backendSyncRegistration == nil {
   359  		return nil
   360  	}
   362  	if logging.CanLogAt(log.Logger, logrus.DebugLevel) {
   363  		log.WithFields(logrus.Fields{
   364  			logfields.ServiceName:      serviceName.Name,
   365  			logfields.ServiceNamespace: serviceName.Namespace,
   366  			logfields.ProxyName:        backendSyncRegistration.ProxyName(),
   367  		}).Debug("Registering service backend sync for L7 loadbalancer")
   368  	}
   370  	s.Lock()
   371  	defer s.Unlock()
   372  	s.registerL7LBServiceBackendSync(serviceName, backendSyncRegistration)
   374  	return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace)
   375  }
   377  // 's' must be locked
   378  func (s *Service) registerL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) {
   379  	info := s.l7lbSvcs[serviceName]
   380  	if info == nil {
   381  		info = &L7LBInfo{}
   382  	}
   384  	if info.backendSyncRegistrations == nil {
   385  		info.backendSyncRegistrations = make(map[BackendSyncer]struct{}, 1)
   386  	}
   387  	info.backendSyncRegistrations[backendSyncRegistration] = struct{}{}
   389  	s.l7lbSvcs[serviceName] = info
   390  }
   392  func (s *Service) DeregisterL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName) error {
   393  	if logging.CanLogAt(log.Logger, logrus.DebugLevel) {
   394  		log.WithFields(logrus.Fields{
   395  			logfields.ServiceName:      serviceName.Name,
   396  			logfields.ServiceNamespace: serviceName.Namespace,
   397  		}).Debug("Deregistering service from L7 load balancing")
   398  	}
   400  	s.Lock()
   401  	defer s.Unlock()
   403  	changed := s.deregisterL7LBServiceRedirect(serviceName, resourceName)
   405  	if !changed {
   406  		return nil
   407  	}
   409  	return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace)
   410  }
   412  func (s *Service) deregisterL7LBServiceRedirect(serviceName lb.ServiceName, resourceName L7LBResourceName) bool {
   413  	info, found := s.l7lbSvcs[serviceName]
   414  	if !found {
   415  		return false
   416  	}
   418  	empty := L7LBResourceName{}
   420  	changed := false
   422  	if info.ownerRef == resourceName {
   423  		info.ownerRef = empty
   424  		info.proxyPort = 0
   425  		changed = true
   426  	}
   428  	if len(info.backendSyncRegistrations) == 0 && info.ownerRef == empty {
   429  		delete(s.l7lbSvcs, serviceName)
   430  		changed = true
   431  	}
   433  	return changed
   434  }
   436  func (s *Service) DeregisterL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) error {
   437  	if backendSyncRegistration == nil {
   438  		return nil
   439  	}
   441  	if logging.CanLogAt(log.Logger, logrus.DebugLevel) {
   442  		log.WithFields(logrus.Fields{
   443  			logfields.ServiceName:      serviceName.Name,
   444  			logfields.ServiceNamespace: serviceName.Namespace,
   445  			logfields.ProxyName:        backendSyncRegistration.ProxyName(),
   446  		}).Debug("Deregistering service backend sync for L7 loadbalancer")
   447  	}
   449  	s.Lock()
   450  	defer s.Unlock()
   451  	changed := s.deregisterL7LBServiceBackendSync(serviceName, backendSyncRegistration)
   453  	if !changed {
   454  		return nil
   455  	}
   457  	return s.reUpsertServicesByName(serviceName.Name, serviceName.Namespace)
   458  }
   460  func (s *Service) deregisterL7LBServiceBackendSync(serviceName lb.ServiceName, backendSyncRegistration BackendSyncer) bool {
   461  	info, found := s.l7lbSvcs[serviceName]
   462  	if !found {
   463  		return false
   464  	}
   466  	if info.backendSyncRegistrations == nil {
   467  		return false
   468  	}
   470  	if _, registered := info.backendSyncRegistrations[backendSyncRegistration]; !registered {
   471  		return false
   472  	}
   474  	delete(info.backendSyncRegistrations, backendSyncRegistration)
   476  	empty := L7LBResourceName{}
   477  	if len(info.backendSyncRegistrations) == 0 && info.ownerRef == empty {
   478  		delete(s.l7lbSvcs, serviceName)
   479  	}
   481  	return true
   482  }
   484  // BackendSyncer performs a synchronization of service backends to an
   485  // external loadbalancer (e.g. Envoy L7 Loadbalancer).
   486  type BackendSyncer interface {
   487  	// ProxyName returns a human readable name of the L7 Proxy that acts as
   488  	// // L7 loadbalancer.
   489  	ProxyName() string
   491  	// Sync triggers the actual synchronization and passes the information
   492  	// about the service that should be synchronized.
   493  	Sync(svc *lb.SVC) error
   494  }
   496  func (s *Service) GetLastUpdatedTs() time.Time {
   497  	if val := s.lastUpdatedTs.Load(); val != nil {
   498  		ts, ok := val.(time.Time)
   499  		if ok {
   500  			return ts
   501  		}
   502  	}
   503  	return time.Now()
   504  }
   506  func (s *Service) GetCurrentTs() time.Time {
   507  	return time.Now()
   508  }
   510  func (s *Service) populateBackendMapV3FromV2(ipv4, ipv6 bool) error {
   511  	const (
   512  		v4 = "ipv4"
   513  		v6 = "ipv6"
   514  	)
   516  	enabled := map[string]bool{v4: ipv4, v6: ipv6}
   518  	for v, e := range enabled {
   519  		if !e {
   520  			continue
   521  		}
   523  		var (
   524  			err          error
   525  			v2Map        *bpf.Map
   526  			v3Map        *bpf.Map
   527  			v3BackendVal lbmap.BackendValue
   528  		)
   530  		copyBackendEntries := func(key bpf.MapKey, value bpf.MapValue) {
   531  			if v == v4 {
   532  				v3Map = lbmap.Backend4MapV3
   533  				v1BackendVal := value.(*lbmap.Backend4Value)
   534  				addrCluster := cmtypes.AddrClusterFrom(v1BackendVal.Address.Addr(), 0)
   535  				v3BackendVal, err = lbmap.NewBackend4ValueV3(
   536  					addrCluster,
   537  					v1BackendVal.Port,
   538  					v1BackendVal.Proto,
   539  					lb.GetBackendStateFromFlags(v1BackendVal.Flags),
   540  					0,
   541  				)
   542  				if err != nil {
   543  					log.WithError(err).WithField(logfields.BPFMapName, v3Map.Name()).Debug("Error creating map value")
   544  					return
   545  				}
   546  			} else {
   547  				v3Map = lbmap.Backend6MapV3
   548  				v1BackendVal := value.(*lbmap.Backend6Value)
   549  				addrCluster := cmtypes.AddrClusterFrom(v1BackendVal.Address.Addr(), 0)
   550  				v3BackendVal, err = lbmap.NewBackend6ValueV3(
   551  					addrCluster,
   552  					v1BackendVal.Port,
   553  					v1BackendVal.Proto,
   554  					lb.GetBackendStateFromFlags(v1BackendVal.Flags),
   555  					0,
   556  				)
   557  				if err != nil {
   558  					log.WithError(err).WithField(logfields.BPFMapName, v3Map.Name()).Debug("Error creating map value")
   559  					return
   560  				}
   561  			}
   563  			err := v3Map.Update(key, v3BackendVal)
   564  			if err != nil {
   565  				log.WithError(err).WithField(logfields.BPFMapName, v3Map.Name()).Warn("Error updating map")
   566  			}
   567  		}
   569  		if v == v4 {
   570  			v2Map = lbmap.Backend4MapV2
   571  		} else {
   572  			v2Map = lbmap.Backend6MapV2
   573  		}
   575  		err = v2Map.DumpWithCallback(copyBackendEntries)
   576  		if err != nil {
   577  			return fmt.Errorf("unable to populate %s: %w", v2Map.Name(), err)
   578  		}
   580  		// V2 backend map will be removed from bpffs at this point,
   581  		// the map will be actually removed once the last program
   582  		// referencing it has been removed.
   583  		err = v2Map.Close()
   584  		if err != nil {
   585  			log.WithError(err).WithField(logfields.BPFMapName, v2Map.Name()).Warn("Error closing map")
   586  		}
   588  		err = v2Map.Unpin()
   589  		if err != nil {
   590  			log.WithError(err).WithField(logfields.BPFMapName, v2Map.Name()).Warn("Error unpinning map")
   591  		}
   593  	}
   594  	return nil
   595  }
   597  // InitMaps opens or creates BPF maps used by services.
   598  //
   599  // If restore is set to false, entries of the maps are removed.
   600  func (s *Service) InitMaps(ipv6, ipv4, sockMaps, restore bool) error {
   601  	s.Lock()
   602  	defer s.Unlock()
   604  	var (
   605  		v2BackendMapExistsV4 bool
   606  		v2BackendMapExistsV6 bool
   607  	)
   609  	toOpen := []*bpf.Map{}
   610  	toDelete := []*bpf.Map{}
   611  	if ipv6 {
   612  		toOpen = append(toOpen, lbmap.Service6MapV2, lbmap.Backend6MapV3, lbmap.RevNat6Map)
   613  		if !restore {
   614  			toDelete = append(toDelete, lbmap.Service6MapV2, lbmap.Backend6MapV3, lbmap.RevNat6Map)
   615  		}
   616  		if sockMaps {
   617  			if err := lbmap.CreateSockRevNat6Map(); err != nil {
   618  				return err
   619  			}
   620  		}
   621  		v2BackendMapExistsV6 = lbmap.Backend6MapV2.Open() == nil
   622  	}
   623  	if ipv4 {
   624  		toOpen = append(toOpen, lbmap.Service4MapV2, lbmap.Backend4MapV3, lbmap.RevNat4Map)
   625  		if !restore {
   626  			toDelete = append(toDelete, lbmap.Service4MapV2, lbmap.Backend4MapV3, lbmap.RevNat4Map)
   627  		}
   628  		if sockMaps {
   629  			if err := lbmap.CreateSockRevNat4Map(); err != nil {
   630  				return err
   631  			}
   632  		}
   633  		v2BackendMapExistsV4 = lbmap.Backend4MapV2.Open() == nil
   634  	}
   636  	for _, m := range toOpen {
   637  		if err := m.OpenOrCreate(); err != nil {
   638  			return err
   639  		}
   640  	}
   641  	for _, m := range toDelete {
   642  		if err := m.DeleteAll(); err != nil {
   643  			return err
   644  		}
   645  	}
   647  	if v2BackendMapExistsV4 || v2BackendMapExistsV6 {
   648  		log.Info("Backend map v2 exists. Migrating entries to backend map v3.")
   649  		if err := s.populateBackendMapV3FromV2(v2BackendMapExistsV4, v2BackendMapExistsV6); err != nil {
   650  			log.WithError(err).Warn("Error populating V3 map from V2 map, might interrupt existing connections during upgrade")
   651  		}
   652  	}
   654  	return nil
   655  }
   657  // UpsertService inserts or updates the given service.
   658  //
   659  // The first return value is true if the service hasn't existed before.
   660  func (s *Service) UpsertService(params *lb.SVC) (bool, lb.ID, error) {
   661  	s.Lock()
   662  	defer s.Unlock()
   663  	return s.upsertService(params)
   664  }
   666  // reUpsertServicesByName upserts a service again to update it's internal state after
   667  // changes for L7 service redirection.
   668  // Write lock on 's' must be held.
   669  func (s *Service) reUpsertServicesByName(name, namespace string) error {
   670  	for _, svc := range s.svcByHash {
   671  		if svc.svcName.Name == name && svc.svcName.Namespace == namespace {
   672  			svcCopy := svc.deepCopyToLBSVC()
   673  			if _, _, err := s.upsertService(svcCopy); err != nil {
   674  				return fmt.Errorf("error while updating service in LB map: %w", err)
   675  			}
   676  		}
   677  	}
   678  	return nil
   679  }
   681  func (s *Service) upsertService(params *lb.SVC) (bool, lb.ID, error) {
   682  	empty := L7LBResourceName{}
   684  	// Set L7 LB for this service if registered.
   685  	l7lbInfo, exists := s.l7lbSvcs[params.Name]
   686  	if exists && l7lbInfo.ownerRef != empty && l7lbInfo.isProtoAndPortMatch(&params.Frontend.L4Addr) {
   687  		params.L7LBProxyPort = l7lbInfo.proxyPort
   688  	} else {
   689  		params.L7LBProxyPort = 0
   690  	}
   692  	// L7 LB is sharing a C union in the datapath, disable session
   693  	// affinity if L7 LB is configured for this service.
   694  	if params.L7LBProxyPort != 0 {
   695  		params.SessionAffinity = false
   696  		params.SessionAffinityTimeoutSec = 0
   697  	}
   699  	// Implement a "lazy load" function for the scoped logger, so the expensive
   700  	// call to 'WithFields' is only done if needed.
   701  	debugLogsEnabled := logging.CanLogAt(log.Logger, logrus.DebugLevel)
   702  	scopedLog := log
   703  	scopedLogPopulated := false
   704  	getScopedLog := func() *logrus.Entry {
   705  		if !scopedLogPopulated {
   706  			scopedLog = scopedLog.WithFields(logrus.Fields{
   707  				logfields.ServiceIP: params.Frontend.L3n4Addr,
   708  				logfields.Backends:  params.Backends,
   710  				logfields.ServiceType:                params.Type,
   711  				logfields.ServiceExtTrafficPolicy:    params.ExtTrafficPolicy,
   712  				logfields.ServiceIntTrafficPolicy:    params.IntTrafficPolicy,
   713  				logfields.ServiceHealthCheckNodePort: params.HealthCheckNodePort,
   714  				logfields.ServiceName:                params.Name.Name,
   715  				logfields.ServiceNamespace:           params.Name.Namespace,
   717  				logfields.SessionAffinity:        params.SessionAffinity,
   718  				logfields.SessionAffinityTimeout: params.SessionAffinityTimeoutSec,
   720  				logfields.LoadBalancerSourceRanges: params.LoadBalancerSourceRanges,
   722  				logfields.L7LBProxyPort: params.L7LBProxyPort,
   723  			})
   725  			scopedLogPopulated = true
   726  		}
   727  		return scopedLog
   728  	}
   730  	if debugLogsEnabled {
   731  		getScopedLog().Debug("Upserting service")
   732  	}
   734  	if !option.Config.EnableSVCSourceRangeCheck &&
   735  		len(params.LoadBalancerSourceRanges) != 0 {
   736  		getScopedLog().Warnf("--%s is disabled, ignoring loadBalancerSourceRanges",
   737  			option.EnableSVCSourceRangeCheck)
   738  	}
   740  	// Backends must either be the same IP proto as the frontend, or can be of
   741  	// a different proto for NAT46/64. However, backends must be consistently
   742  	// either v4 or v6, but not a mix.
   743  	v4Seen := 0
   744  	v6Seen := 0
   745  	for _, b := range params.Backends {
   746  		if b.L3n4Addr.IsIPv6() {
   747  			v6Seen++
   748  		} else {
   749  			v4Seen++
   750  		}
   751  	}
   752  	if v4Seen > 0 && v6Seen > 0 {
   753  		err := fmt.Errorf("Unable to upsert service %s with a mixed set of IPv4 and IPv6 backends", params.Frontend.L3n4Addr.String())
   754  		return false, lb.ID(0), err
   755  	}
   756  	v6Svc := params.Frontend.IsIPv6()
   757  	if (v6Svc || v6Seen > 0) && !option.Config.EnableIPv6 {
   758  		err := fmt.Errorf("Unable to upsert service %s as IPv6 is disabled", params.Frontend.L3n4Addr.String())
   759  		return false, lb.ID(0), err
   760  	}
   761  	if (!v6Svc || v4Seen > 0) && !option.Config.EnableIPv4 {
   762  		err := fmt.Errorf("Unable to upsert service %s as IPv4 is disabled", params.Frontend.L3n4Addr.String())
   763  		return false, lb.ID(0), err
   764  	}
   765  	params.NatPolicy = lb.SVCNatPolicyNone
   766  	if v6Svc && v4Seen > 0 {
   767  		params.NatPolicy = lb.SVCNatPolicyNat64
   768  	} else if !v6Svc && v6Seen > 0 {
   769  		params.NatPolicy = lb.SVCNatPolicyNat46
   770  	}
   771  	if params.NatPolicy != lb.SVCNatPolicyNone && !option.Config.NodePortNat46X64 {
   772  		err := fmt.Errorf("Unable to upsert service %s as NAT46/64 is disabled", params.Frontend.L3n4Addr.String())
   773  		return false, lb.ID(0), err
   774  	}
   776  	// If needed, create svcInfo and allocate service ID
   777  	svc, new, prevSessionAffinity, prevLoadBalancerSourceRanges, err := s.createSVCInfoIfNotExist(params)
   778  	if err != nil {
   779  		return false, lb.ID(0), err
   780  	}
   782  	// TODO(brb) defer ServiceID release after we have a lbmap "rollback"
   783  	// If getScopedLog() has not been called, this field will still be included
   784  	// from this point on in the function.
   785  	scopedLog = scopedLog.WithField(logfields.ServiceID, svc.frontend.ID)
   786  	if debugLogsEnabled {
   787  		getScopedLog().Debug("Acquired service ID")
   788  	}
   790  	filterBackends := svc.filterBackends(params.Frontend)
   791  	prevBackendCount := len(svc.backends)
   793  	backendsCopy := []*lb.Backend{}
   794  	for _, b := range params.Backends {
   795  		// Local redirect services or services with trafficPolicy=Local may
   796  		// only use node-local backends for external scope. We implement this by
   797  		// filtering out all backend IPs which are not a local endpoint.
   798  		if filterBackends && len(b.NodeName) > 0 && b.NodeName != nodeTypes.GetName() {
   799  			continue
   800  		}
   801  		backendsCopy = append(backendsCopy, b.DeepCopy())
   802  	}
   804  	// Update backends cache and allocate/release backend IDs
   805  	newBackends, obsoleteBackends, obsoleteSVCBackendIDs, err := s.updateBackendsCacheLocked(svc, backendsCopy)
   806  	if err != nil {
   807  		return false, lb.ID(0), err
   808  	}
   810  	if l7lbInfo != nil {
   811  		for bs := range l7lbInfo.backendSyncRegistrations {
   812  			svcCopy := svc.deepCopyToLBSVC()
   813  			if err := bs.Sync(svcCopy); err != nil {
   814  				return false, lb.ID(0), fmt.Errorf("failed to sync L7 LB backends (proxy: %s): %w", bs.ProxyName(), err)
   815  			}
   816  		}
   817  	}
   819  	// Update lbmaps (BPF service maps)
   820  	if err = s.upsertServiceIntoLBMaps(svc, svc.isExtLocal(), svc.isIntLocal(), prevBackendCount,
   821  		newBackends, obsoleteBackends, prevSessionAffinity, prevLoadBalancerSourceRanges,
   822  		obsoleteSVCBackendIDs, getScopedLog, debugLogsEnabled); err != nil {
   823  		return false, lb.ID(0), err
   824  	}
   826  	// Update managed neighbor entries of the LB
   827  	if option.Config.DatapathMode == datapathOpt.DatapathModeLBOnly {
   828  		s.upsertBackendNeighbors(newBackends, obsoleteBackends)
   829  	}
   831  	// Only add a HealthCheckNodePort server if this is a service which may
   832  	// only contain local backends (i.e. it has externalTrafficPolicy=Local)
   833  	if option.Config.EnableHealthCheckNodePort {
   834  		if svc.isExtLocal() && filterBackends && svc.svcHealthCheckNodePort > 0 {
   835  			// HealthCheckNodePort is used by external systems to poll the state of the Service,
   836  			// it should never take into consideration Terminating backends, even when there are only
   837  			// Terminating backends.
   838  			//
   839  			// There is one special case is L7 proxy service, which never have any
   840  			// backends because the traffic will be redirected.
   841  			activeBackends := 0
   842  			if l7lbInfo != nil {
   843  				// Set this to 1 because Envoy will be running in this case.
   844  				getScopedLog().WithField(logfields.ServiceHealthCheckNodePort, svc.svcHealthCheckNodePort).
   845  					Debug("L7 service with HealthcheckNodePort enabled")
   846  				activeBackends = 1
   847  			} else {
   848  				for _, b := range backendsCopy {
   849  					if b.State == lb.BackendStateActive {
   850  						activeBackends++
   851  					}
   852  				}
   853  			}
   854  			s.healthServer.UpsertService(svc.frontend.ID, svc.svcName.Namespace, svc.svcName.Name,
   855  				activeBackends, svc.svcHealthCheckNodePort)
   857  			if err = s.upsertNodePortHealthService(svc, &nodeMetaCollector{}); err != nil {
   858  				return false, lb.ID(0), fmt.Errorf("upserting NodePort health service failed: %w", err)
   859  			}
   861  		} else if svc.svcHealthCheckNodePort == 0 {
   862  			// Remove the health check server in case this service used to have
   863  			// externalTrafficPolicy=Local with HealthCheckNodePort in the previous
   864  			// version, but not anymore.
   865  			s.healthServer.DeleteService(lb.ID(svc.frontend.ID))
   867  			if svc.healthcheckFrontendHash != "" {
   868  				healthSvc := s.svcByHash[svc.healthcheckFrontendHash]
   869  				if healthSvc != nil {
   870  					s.deleteServiceLocked(healthSvc)
   871  				}
   872  				svc.healthcheckFrontendHash = ""
   873  			}
   874  		}
   875  	}
   877  	if new {
   878  		metrics.ServicesEventsCount.WithLabelValues("add").Inc()
   879  	} else {
   880  		metrics.ServicesEventsCount.WithLabelValues("update").Inc()
   881  	}
   883  	s.notifyMonitorServiceUpsert(svc.frontend, svc.backends,
   884  		svc.svcType, svc.svcExtTrafficPolicy, svc.svcIntTrafficPolicy, svc.svcName.Name, svc.svcName.Namespace)
   885  	return new, lb.ID(svc.frontend.ID), nil
   886  }
   888  type NodeMetaCollector interface {
   889  	GetIPv4() net.IP
   890  	GetIPv6() net.IP
   891  }
   893  type nodeMetaCollector struct{}
   895  func (n *nodeMetaCollector) GetIPv4() net.IP {
   896  	return node.GetIPv4()
   897  }
   899  func (n *nodeMetaCollector) GetIPv6() net.IP {
   900  	return node.GetIPv6()
   901  }
   903  // upsertNodePortHealthService makes the HealthCheckNodePort available to the external IP of the service
   904  func (s *Service) upsertNodePortHealthService(svc *svcInfo, nodeMeta NodeMetaCollector) error {
   905  	// For any service that has a healthCheckNodePort, we create a healthCheck service
   906  	// The service that is created does not need an another healthCheck service.
   907  	// The easiest way end that loop is to check for the HealthCheckNodePort
   908  	// Also, without a healthCheckNodePort, we don't need to create a healthCheck service
   909  	if !option.Config.EnableHealthCheckLoadBalancerIP || svc.svcType != lb.SVCTypeLoadBalancer || svc.svcHealthCheckNodePort == 0 {
   910  		if svc.healthcheckFrontendHash == "" {
   911  			return nil
   912  		}
   914  		healthSvc := s.svcByHash[svc.healthcheckFrontendHash]
   915  		if healthSvc != nil {
   916  			s.deleteServiceLocked(healthSvc)
   917  		}
   918  		svc.healthcheckFrontendHash = ""
   920  		return nil
   921  	}
   923  	healthCheckSvcName := svc.svcName
   924  	healthCheckSvcName.Name = svc.svcName.Name + "-healthCheck"
   926  	healthCheckFrontend := *lb.NewL3n4AddrID(
   927  		lb.TCP,
   928  		svc.frontend.AddrCluster,
   929  		svc.svcHealthCheckNodePort,
   930  		lb.ScopeExternal,
   931  		0,
   932  	)
   934  	if svc.healthcheckFrontendHash != "" && svc.healthcheckFrontendHash != healthCheckFrontend.Hash() {
   935  		healthSvc := s.svcByHash[svc.healthcheckFrontendHash]
   936  		if healthSvc != nil {
   937  			s.deleteServiceLocked(healthSvc)
   938  		}
   939  	}
   941  	var ip netip.Addr
   942  	var ok bool
   943  	if svc.frontend.AddrCluster.Is4() {
   944  		ip, ok = netip.AddrFromSlice(nodeMeta.GetIPv4().To4())
   945  	} else {
   946  		ip, ok = netip.AddrFromSlice(nodeMeta.GetIPv6())
   947  	}
   949  	if !ok {
   950  		return fmt.Errorf("failed to parse node IP")
   951  	}
   953  	clusterAddr := cmtypes.AddrClusterFrom(ip, option.Config.ClusterID)
   955  	healthCheckBackends := []*lb.Backend{
   956  		{
   957  			L3n4Addr: *lb.NewL3n4Addr(lb.TCP, clusterAddr, svc.svcHealthCheckNodePort, lb.ScopeInternal),
   958  			State:    lb.BackendStateActive,
   959  			NodeName: nodeTypes.GetName(),
   960  		},
   961  	}
   962  	// Create a new service with the healthcheck frontend and healthcheck backend
   963  	healthCheckSvc := &lb.SVC{
   964  		Name:             healthCheckSvcName,
   965  		Type:             svc.svcType,
   966  		Frontend:         healthCheckFrontend,
   967  		ExtTrafficPolicy: lb.SVCTrafficPolicyLocal,
   968  		IntTrafficPolicy: lb.SVCTrafficPolicyLocal,
   969  		Backends:         healthCheckBackends,
   970  		LoopbackHostport: true,
   971  	}
   973  	_, _, err := s.upsertService(healthCheckSvc)
   974  	if err != nil {
   975  		return err
   976  	}
   977  	svc.healthcheckFrontendHash = healthCheckFrontend.Hash()
   979  	log.WithFields(logrus.Fields{
   980  		logfields.ServiceName:      svc.svcName.Name,
   981  		logfields.ServiceNamespace: svc.svcName.Namespace,
   982  	}).Debug("Created healthcheck service for frontend")
   984  	return nil
   985  }
   987  // UpdateBackendsState updates all the service(s) with the updated state of
   988  // the given backends. It also persists the updated backend states to the BPF maps.
   989  //
   990  // Backend state transitions are validated before processing.
   991  //
   992  // In case of duplicated backends in the list, the state will be updated to the
   993  // last duplicate entry.
   994  func (s *Service) UpdateBackendsState(backends []*lb.Backend) error {
   995  	if len(backends) == 0 {
   996  		return nil
   997  	}
   999  	if logging.CanLogAt(log.Logger, logrus.DebugLevel) {
  1000  		for _, b := range backends {
  1001  			log.WithFields(logrus.Fields{
  1002  				logfields.L3n4Addr:         b.L3n4Addr.String(),
  1003  				logfields.BackendState:     b.State,
  1004  				logfields.BackendPreferred: b.Preferred,
  1005  			}).Debug("Update backend states")
  1006  		}
  1007  	}
  1009  	var (
  1010  		errs            error
  1011  		updatedBackends []*lb.Backend
  1012  	)
  1013  	updateSvcs := make(map[lb.ID]*datapathTypes.UpsertServiceParams)
  1015  	s.Lock()
  1016  	defer s.Unlock()
  1017  	for _, updatedB := range backends {
  1018  		hash := updatedB.L3n4Addr.Hash()
  1020  		be, exists := s.backendByHash[hash]
  1021  		if !exists {
  1022  			// Cilium service API and Kubernetes events are asynchronous, so it's
  1023  			// possible to receive an API call for a backend that's already deleted.
  1024  			continue
  1025  		}
  1026  		if !lb.IsValidStateTransition(be.State, updatedB.State) {
  1027  			currentState, _ := be.State.String()
  1028  			newState, _ := updatedB.State.String()
  1029  			errs = errors.Join(errs,
  1030  				fmt.Errorf("invalid state transition for backend[%s] (%s) -> (%s)",
  1031  					updatedB.String(), currentState, newState),
  1032  			)
  1033  			continue
  1034  		}
  1035  		be.State = updatedB.State
  1036  		be.Preferred = updatedB.Preferred
  1038  		for id, info := range s.svcByID {
  1039  			var p *datapathTypes.UpsertServiceParams
  1040  			for i, b := range info.backends {
  1041  				if b.L3n4Addr.String() != updatedB.L3n4Addr.String() {
  1042  					continue
  1043  				}
  1044  				if b.State == updatedB.State {
  1045  					break
  1046  				}
  1047  				info.backends[i].State = updatedB.State
  1048  				info.backends[i].Preferred = updatedB.Preferred
  1049  				found := false
  1051  				if p, found = updateSvcs[id]; !found {
  1052  					p = &datapathTypes.UpsertServiceParams{
  1053  						ID:                        uint16(id),
  1054  						IP:                        info.frontend.L3n4Addr.AddrCluster.AsNetIP(),
  1055  						Port:                      info.frontend.L3n4Addr.L4Addr.Port,
  1056  						PrevBackendsCount:         len(info.backends),
  1057  						IPv6:                      info.frontend.IsIPv6(),
  1058  						Type:                      info.svcType,
  1059  						ExtLocal:                  info.isExtLocal(),
  1060  						IntLocal:                  info.isIntLocal(),
  1061  						Scope:                     info.frontend.L3n4Addr.Scope,
  1062  						SessionAffinity:           info.sessionAffinity,
  1063  						SessionAffinityTimeoutSec: info.sessionAffinityTimeoutSec,
  1064  						CheckSourceRange:          info.checkLBSourceRange(),
  1065  						UseMaglev:                 info.useMaglev(),
  1066  						Name:                      info.svcName,
  1067  						LoopbackHostport:          info.LoopbackHostport,
  1068  					}
  1069  				}
  1070  				p.PreferredBackends, p.ActiveBackends, p.NonActiveBackends = segregateBackends(info.backends)
  1071  				updateSvcs[id] = p
  1072  				log.WithFields(logrus.Fields{
  1073  					logfields.ServiceID:        p.ID,
  1074  					logfields.BackendID:        b.ID,
  1075  					logfields.L3n4Addr:         b.L3n4Addr.String(),
  1076  					logfields.BackendState:     b.State,
  1077  					logfields.BackendPreferred: b.Preferred,
  1078  				}).Info("Persisting service with backend state update")
  1079  			}
  1080  			s.svcByID[id] = info
  1081  			s.svcByHash[info.frontend.Hash()] = info
  1082  		}
  1083  		updatedBackends = append(updatedBackends, be)
  1084  	}
  1086  	// Update the persisted backend state in BPF maps.
  1087  	for _, b := range updatedBackends {
  1088  		log.WithFields(logrus.Fields{
  1089  			logfields.BackendID:        b.ID,
  1090  			logfields.L3n4Addr:         b.L3n4Addr.String(),
  1091  			logfields.BackendState:     b.State,
  1092  			logfields.BackendPreferred: b.Preferred,
  1093  		}).Info("Persisting updated backend state for backend")
  1094  		if err := s.lbmap.UpdateBackendWithState(b); err != nil {
  1095  			errs = errors.Join(errs, fmt.Errorf("failed to update backend %+v: %w", b, err))
  1096  		}
  1097  	}
  1099  	for i := range updateSvcs {
  1100  		errs = errors.Join(errs, s.lbmap.UpsertService(updateSvcs[i]))
  1101  	}
  1102  	return errs
  1103  }
  1105  // DeleteServiceByID removes a service identified by the given ID.
  1106  func (s *Service) DeleteServiceByID(id lb.ServiceID) (bool, error) {
  1107  	s.Lock()
  1108  	defer s.Unlock()
  1110  	if svc, found := s.svcByID[lb.ID(id)]; found {
  1111  		return true, s.deleteServiceLocked(svc)
  1112  	}
  1114  	return false, nil
  1115  }
  1117  // DeleteService removes the given service.
  1118  func (s *Service) DeleteService(frontend lb.L3n4Addr) (bool, error) {
  1119  	s.Lock()
  1120  	defer s.Unlock()
  1122  	if svc, found := s.svcByHash[frontend.Hash()]; found {
  1123  		return true, s.deleteServiceLocked(svc)
  1124  	}
  1126  	return false, nil
  1127  }
  1129  // GetDeepCopyServiceByID returns a deep-copy of a service identified with
  1130  // the given ID.
  1131  //
  1132  // If a service cannot be found, returns false.
  1133  func (s *Service) GetDeepCopyServiceByID(id lb.ServiceID) (*lb.SVC, bool) {
  1134  	s.RLock()
  1135  	defer s.RUnlock()
  1137  	svc, found := s.svcByID[lb.ID(id)]
  1138  	if !found {
  1139  		return nil, false
  1140  	}
  1142  	return svc.deepCopyToLBSVC(), true
  1143  }
  1145  // GetDeepCopyServices returns a deep-copy of all installed services.
  1146  func (s *Service) GetDeepCopyServices() []*lb.SVC {
  1147  	s.RLock()
  1148  	defer s.RUnlock()
  1150  	svcs := make([]*lb.SVC, 0, len(s.svcByHash))
  1151  	for _, svc := range s.svcByHash {
  1152  		svcs = append(svcs, svc.deepCopyToLBSVC())
  1153  	}
  1155  	return svcs
  1156  }
  1158  // GetDeepCopyServiceByFrontend returns a deep-copy of the service that matches the Frontend address.
  1159  func (s *Service) GetDeepCopyServiceByFrontend(frontend lb.L3n4Addr) (*lb.SVC, bool) {
  1160  	s.RLock()
  1161  	defer s.RUnlock()
  1163  	if svc, found := s.svcByHash[frontend.Hash()]; found {
  1164  		return svc.deepCopyToLBSVC(), true
  1165  	}
  1167  	return nil, false
  1168  }
  1170  // RestoreServices restores services from BPF maps.
  1171  //
  1172  // It first restores all the service entries, followed by backend entries.
  1173  // In the process, it deletes any duplicate backend entries that were leaked, and
  1174  // are not referenced by any service entries.
  1175  //
  1176  // The method should be called once before establishing a connectivity
  1177  // to kube-apiserver.
  1178  func (s *Service) RestoreServices() error {
  1179  	s.Lock()
  1180  	defer s.Unlock()
  1181  	backendsById := make(map[lb.BackendID]struct{})
  1183  	var errs error
  1184  	// Restore service cache from BPF maps
  1185  	if err := s.restoreServicesLocked(backendsById); err != nil {
  1186  		errs = errors.Join(errs, fmt.Errorf("error while restoring services: %w", err))
  1187  	}
  1189  	// Restore backend IDs
  1190  	if err := s.restoreBackendsLocked(backendsById); err != nil {
  1191  		errs = errors.Join(errs, fmt.Errorf("error while restoring backends: %w", err))
  1192  	}
  1194  	// Remove LB source ranges for no longer existing services
  1195  	if option.Config.EnableSVCSourceRangeCheck {
  1196  		errs = errors.Join(errs, s.restoreAndDeleteOrphanSourceRanges())
  1197  	}
  1198  	return errs
  1199  }
  1201  // deleteOrphanAffinityMatchesLocked removes affinity matches which point to
  1202  // non-existent svc ID and backend ID tuples.
  1203  func (s *Service) deleteOrphanAffinityMatchesLocked() error {
  1204  	matches, err := s.lbmap.DumpAffinityMatches()
  1205  	if err != nil {
  1206  		return err
  1207  	}
  1209  	toRemove := map[lb.ID][]lb.BackendID{}
  1211  	local := make(map[lb.ID]map[lb.BackendID]struct{}, len(s.svcByID))
  1212  	for id, svc := range s.svcByID {
  1213  		if !svc.sessionAffinity {
  1214  			continue
  1215  		}
  1216  		local[id] = make(map[lb.BackendID]struct{}, len(svc.backends))
  1217  		for _, backend := range svc.backends {
  1218  			local[id][backend.ID] = struct{}{}
  1219  		}
  1220  	}
  1222  	for svcID, backendIDs := range matches {
  1223  		for bID := range backendIDs {
  1224  			found := false
  1225  			if _, ok := local[lb.ID(svcID)]; ok {
  1226  				if _, ok := local[lb.ID(svcID)][lb.BackendID(bID)]; ok {
  1227  					found = true
  1228  				}
  1229  			}
  1230  			if !found {
  1231  				toRemove[lb.ID(svcID)] = append(toRemove[lb.ID(svcID)], lb.BackendID(bID))
  1232  			}
  1233  		}
  1234  	}
  1236  	for svcID, backendIDs := range toRemove {
  1237  		s.deleteBackendsFromAffinityMatchMap(svcID, backendIDs)
  1238  	}
  1240  	return nil
  1241  }
  1243  func (s *Service) restoreAndDeleteOrphanSourceRanges() error {
  1244  	opts := []bool{}
  1245  	if option.Config.EnableIPv4 {
  1246  		opts = append(opts, false)
  1247  	}
  1248  	if option.Config.EnableIPv6 {
  1249  		opts = append(opts, true)
  1250  	}
  1252  	for _, ipv6 := range opts {
  1253  		srcRangesBySvcID, err := s.lbmap.DumpSourceRanges(ipv6)
  1254  		if err != nil {
  1255  			return err
  1256  		}
  1257  		for svcID, srcRanges := range srcRangesBySvcID {
  1258  			svc, found := s.svcByID[lb.ID(svcID)]
  1259  			if !found {
  1260  				// Delete ranges
  1261  				if err := s.lbmap.UpdateSourceRanges(svcID, srcRanges, nil, ipv6); err != nil {
  1262  					return err
  1263  				}
  1264  			} else {
  1265  				svc.loadBalancerSourceRanges = srcRanges
  1266  			}
  1267  		}
  1268  	}
  1270  	return nil
  1271  }
  1273  // SyncWithK8sFinished removes services which we haven't heard about during
  1274  // a sync period of cilium-agent's k8s service cache.
  1275  //
  1276  // The removal is based on an assumption that during the sync period
  1277  // UpsertService() is going to be called for each alive service.
  1278  //
  1279  // Additionally, it returns a list of services which are associated with
  1280  // stale backends, and which shall be refreshed. Stale services shall be
  1281  // refreshed regardless of whether an error is also returned or not.
  1282  //
  1283  // The localOnly flag allows to perform a two pass removal, handling local
  1284  // services first, and processing global ones only after full synchronization
  1285  // with all remote clusters.
  1286  func (s *Service) SyncWithK8sFinished(localOnly bool, localServices sets.Set[k8s.ServiceID]) (stale []k8s.ServiceID, err error) {
  1287  	s.Lock()
  1288  	defer s.Unlock()
  1290  	for _, svc := range s.svcByHash {
  1291  		svcID := k8s.ServiceID{
  1292  			Cluster:   svc.svcName.Cluster,
  1293  			Namespace: svc.svcName.Namespace,
  1294  			Name:      svc.svcName.Name,
  1295  		}
  1297  		// Skip processing global services when the localOnly flag is set.
  1298  		if localOnly && !localServices.Has(svcID) {
  1299  			continue
  1300  		}
  1302  		if svc.restoredFromDatapath {
  1303  			log.WithFields(logrus.Fields{
  1304  				logfields.ServiceID: svc.frontend.ID,
  1305  				logfields.L3n4Addr:  logfields.Repr(svc.frontend.L3n4Addr),
  1306  			}).
  1307  				Warn("Deleting no longer present service")
  1309  			if err := s.deleteServiceLocked(svc); err != nil {
  1310  				return stale, fmt.Errorf("Unable to remove service %+v: %w", svc, err)
  1311  			}
  1312  		} else if svc.restoredBackendHashes.Len() > 0 {
  1313  			// The service is still associated with stale backends
  1314  			stale = append(stale, svcID)
  1315  			log.WithFields(logrus.Fields{
  1316  				logfields.ServiceID:      svc.frontend.ID,
  1317  				logfields.ServiceName:    svc.svcName.String(),
  1318  				logfields.L3n4Addr:       logfields.Repr(svc.frontend.L3n4Addr),
  1319  				logfields.OrphanBackends: svc.restoredBackendHashes.Len(),
  1320  			}).Info("Service has stale backends: triggering refresh")
  1321  		}
  1323  		svc.restoredBackendHashes = nil
  1324  	}
  1326  	if localOnly {
  1327  		// Wait for full clustermesh synchronization before finalizing the
  1328  		// removal of orphan backends and affinity matches.
  1329  		return stale, nil
  1330  	}
  1332  	// Remove no longer existing affinity matches
  1333  	if option.Config.EnableSessionAffinity {
  1334  		if err := s.deleteOrphanAffinityMatchesLocked(); err != nil {
  1335  			return stale, err
  1336  		}
  1337  	}
  1339  	// Remove obsolete backends and release their IDs
  1340  	if err := s.deleteOrphanBackends(); err != nil {
  1341  		log.WithError(err).Warn("Failed to remove orphan backends")
  1342  	}
  1344  	return stale, nil
  1345  }
  1347  func (s *Service) createSVCInfoIfNotExist(p *lb.SVC) (*svcInfo, bool, bool,
  1348  	[]*cidr.CIDR, error,
  1349  ) {
  1350  	prevSessionAffinity := false
  1351  	prevLoadBalancerSourceRanges := []*cidr.CIDR{}
  1353  	hash := p.Frontend.Hash()
  1354  	svc, found := s.svcByHash[hash]
  1355  	if !found {
  1356  		// Allocate service ID for the new service
  1357  		addrID, err := AcquireID(p.Frontend.L3n4Addr, uint32(p.Frontend.ID))
  1358  		if err != nil {
  1359  			return nil, false, false, nil,
  1360  				fmt.Errorf("Unable to allocate service ID %d for %v: %w",
  1361  					p.Frontend.ID, p.Frontend, err)
  1362  		}
  1363  		p.Frontend.ID = addrID.ID
  1365  		svc = &svcInfo{
  1366  			hash:          hash,
  1367  			frontend:      p.Frontend,
  1368  			backendByHash: map[string]*lb.Backend{},
  1370  			svcType: p.Type,
  1371  			svcName: p.Name,
  1373  			sessionAffinity:           p.SessionAffinity,
  1374  			sessionAffinityTimeoutSec: p.SessionAffinityTimeoutSec,
  1376  			svcExtTrafficPolicy:      p.ExtTrafficPolicy,
  1377  			svcIntTrafficPolicy:      p.IntTrafficPolicy,
  1378  			svcNatPolicy:             p.NatPolicy,
  1379  			svcHealthCheckNodePort:   p.HealthCheckNodePort,
  1380  			loadBalancerSourceRanges: p.LoadBalancerSourceRanges,
  1381  			l7LBProxyPort:            p.L7LBProxyPort,
  1382  			LoopbackHostport:         p.LoopbackHostport,
  1383  		}
  1384  		s.svcByID[p.Frontend.ID] = svc
  1385  		s.svcByHash[hash] = svc
  1386  	} else {
  1387  		// Local Redirect Policies with service matcher would have same frontend
  1388  		// as the service clusterIP type. In such cases, if a Local redirect service
  1389  		// exists, we shouldn't override it with clusterIP type (e.g., k8s event/sync, etc).
  1390  		if svc.svcType == lb.SVCTypeLocalRedirect && p.Type == lb.SVCTypeClusterIP {
  1391  			err := NewErrLocalRedirectServiceExists(p.Frontend, p.Name)
  1392  			return svc, !found, prevSessionAffinity, prevLoadBalancerSourceRanges, err
  1393  		}
  1394  		// Local-redirect service can only override clusterIP service type or itself.
  1395  		if p.Type == lb.SVCTypeLocalRedirect &&
  1396  			(svc.svcType != lb.SVCTypeClusterIP && svc.svcType != lb.SVCTypeLocalRedirect) {
  1397  			err := fmt.Errorf("skip local-redirect service for "+
  1398  				"frontend %v as it overlaps with svc %v of type %v",
  1399  				p.Frontend, svc.svcName, svc.svcType)
  1400  			return svc, !found, prevSessionAffinity, prevLoadBalancerSourceRanges, err
  1401  		}
  1402  		prevSessionAffinity = svc.sessionAffinity
  1403  		prevLoadBalancerSourceRanges = svc.loadBalancerSourceRanges
  1404  		svc.svcType = p.Type
  1405  		svc.svcExtTrafficPolicy = p.ExtTrafficPolicy
  1406  		svc.svcIntTrafficPolicy = p.IntTrafficPolicy
  1407  		svc.svcNatPolicy = p.NatPolicy
  1408  		svc.svcHealthCheckNodePort = p.HealthCheckNodePort
  1409  		svc.sessionAffinity = p.SessionAffinity
  1410  		svc.sessionAffinityTimeoutSec = p.SessionAffinityTimeoutSec
  1411  		svc.loadBalancerSourceRanges = p.LoadBalancerSourceRanges
  1412  		// Name, namespace and cluster are optional and intended for exposure via
  1413  		// API. They they are not part of any BPF maps and cannot be restored
  1414  		// from datapath.
  1415  		if p.Name.Name != "" {
  1416  			svc.svcName.Name = p.Name.Name
  1417  		}
  1418  		if p.Name.Namespace != "" {
  1419  			svc.svcName.Namespace = p.Name.Namespace
  1420  		}
  1421  		if p.Name.Cluster != "" {
  1422  			svc.svcName.Cluster = p.Name.Cluster
  1423  		}
  1424  		// We have heard about the service from k8s, so unset the flag so that
  1425  		// SyncWithK8sFinished() won't consider the service obsolete, and thus
  1426  		// won't remove it.
  1427  		svc.restoredFromDatapath = false
  1429  		// Update L7 load balancer proxy port
  1430  		svc.l7LBProxyPort = p.L7LBProxyPort
  1431  	}
  1433  	return svc, !found, prevSessionAffinity, prevLoadBalancerSourceRanges, nil
  1434  }
  1436  func (s *Service) deleteBackendsFromAffinityMatchMap(svcID lb.ID, backendIDs []lb.BackendID) {
  1437  	log.WithFields(logrus.Fields{
  1438  		logfields.Backends:  backendIDs,
  1439  		logfields.ServiceID: svcID,
  1440  	}).Debug("Deleting backends from session affinity match")
  1442  	for _, bID := range backendIDs {
  1443  		if err := s.lbmap.DeleteAffinityMatch(uint16(svcID), bID); err != nil {
  1444  			log.WithFields(logrus.Fields{
  1445  				logfields.BackendID: bID,
  1446  				logfields.ServiceID: svcID,
  1447  			}).WithError(err).Warn("Unable to remove entry from affinity match map")
  1448  		}
  1449  	}
  1450  }
  1452  func (s *Service) addBackendsToAffinityMatchMap(svcID lb.ID, backendIDs []lb.BackendID) {
  1453  	log.WithFields(logrus.Fields{
  1454  		logfields.Backends:  backendIDs,
  1455  		logfields.ServiceID: svcID,
  1456  	}).Debug("Adding backends to affinity match map")
  1458  	for _, bID := range backendIDs {
  1459  		if err := s.lbmap.AddAffinityMatch(uint16(svcID), bID); err != nil {
  1460  			log.WithFields(logrus.Fields{
  1461  				logfields.BackendID: bID,
  1462  				logfields.ServiceID: svcID,
  1463  			}).WithError(err).Warn("Unable to add entry to affinity match map")
  1464  		}
  1465  	}
  1466  }
  1468  func (s *Service) upsertServiceIntoLBMaps(svc *svcInfo, isExtLocal, isIntLocal bool,
  1469  	prevBackendCount int, newBackends []*lb.Backend, obsoleteBackends []*lb.Backend,
  1470  	prevSessionAffinity bool, prevLoadBalancerSourceRanges []*cidr.CIDR,
  1471  	obsoleteSVCBackendIDs []lb.BackendID, getScopedLog func() *logrus.Entry,
  1472  	debugLogsEnabled bool,
  1473  ) error {
  1474  	v6FE := svc.frontend.IsIPv6()
  1476  	var (
  1477  		toDeleteAffinity, toAddAffinity []lb.BackendID
  1478  		checkLBSrcRange                 bool
  1479  	)
  1481  	// Update sessionAffinity
  1482  	//
  1483  	// If L7 LB is configured for this service then BPF level session affinity is not used so
  1484  	// that the L7 proxy port may be passed in a shared union in the service entry.
  1485  	if option.Config.EnableSessionAffinity && !svc.isL7LBService() {
  1486  		if prevSessionAffinity && !svc.sessionAffinity {
  1487  			// Remove backends from the affinity match because the svc's sessionAffinity
  1488  			// has been disabled
  1489  			toDeleteAffinity = make([]lb.BackendID, 0, len(obsoleteSVCBackendIDs)+len(svc.backends))
  1490  			toDeleteAffinity = append(toDeleteAffinity, obsoleteSVCBackendIDs...)
  1491  			for _, b := range svc.backends {
  1492  				toDeleteAffinity = append(toDeleteAffinity, b.ID)
  1493  			}
  1494  		} else if svc.sessionAffinity {
  1495  			toAddAffinity = make([]lb.BackendID, 0, len(svc.backends))
  1496  			for _, b := range svc.backends {
  1497  				toAddAffinity = append(toAddAffinity, b.ID)
  1498  			}
  1499  			if prevSessionAffinity {
  1500  				// Remove obsolete svc backends if previously the svc had the affinity enabled
  1501  				toDeleteAffinity = make([]lb.BackendID, 0, len(obsoleteSVCBackendIDs))
  1502  				toDeleteAffinity = append(toDeleteAffinity, obsoleteSVCBackendIDs...)
  1503  			}
  1504  		}
  1506  		s.deleteBackendsFromAffinityMatchMap(svc.frontend.ID, toDeleteAffinity)
  1507  		// New affinity matches (toAddAffinity) will be added after the new
  1508  		// backends have been added.
  1509  	}
  1511  	// Update LB source range check cidrs
  1512  	if checkLBSrcRange = svc.checkLBSourceRange() || len(prevLoadBalancerSourceRanges) != 0; checkLBSrcRange {
  1513  		if err := s.lbmap.UpdateSourceRanges(uint16(svc.frontend.ID),
  1514  			prevLoadBalancerSourceRanges, svc.loadBalancerSourceRanges,
  1515  			v6FE); err != nil {
  1516  			return err
  1517  		}
  1518  	}
  1520  	// Add new backends into BPF maps
  1521  	for _, b := range newBackends {
  1522  		if debugLogsEnabled {
  1523  			getScopedLog().WithFields(logrus.Fields{
  1524  				logfields.BackendID:     b.ID,
  1525  				logfields.BackendWeight: b.Weight,
  1526  				logfields.L3n4Addr:      b.L3n4Addr,
  1527  			}).Debug("Adding new backend")
  1528  		}
  1530  		if err := s.lbmap.AddBackend(b, b.L3n4Addr.IsIPv6()); err != nil {
  1531  			return err
  1532  		}
  1533  	}
  1535  	// Upsert service entries into BPF maps
  1536  	preferredBackends, activeBackends, nonActiveBackends := segregateBackends(svc.backends)
  1538  	natPolicy := lb.SVCNatPolicyNone
  1539  	natPolicySet := false
  1540  	for _, b := range svc.backends {
  1541  		// All backends have been previously checked to be either v4 or v6.
  1542  		if !natPolicySet {
  1543  			natPolicySet = true
  1544  			v6BE := b.L3n4Addr.IsIPv6()
  1545  			if v6FE && !v6BE {
  1546  				natPolicy = lb.SVCNatPolicyNat64
  1547  			} else if !v6FE && v6BE {
  1548  				natPolicy = lb.SVCNatPolicyNat46
  1549  			}
  1550  		}
  1551  	}
  1552  	if natPolicy == lb.SVCNatPolicyNat64 {
  1553  		// Backends have been added to the v4 backend map, but we now also need
  1554  		// to add them to the v6 backend map as v4-in-v6 address. The reason is
  1555  		// that backends could be used by multiple services, so a v4->v4 service
  1556  		// expects them in the v4 map, but v6->v4 service enters the v6 datapath
  1557  		// and looks them up in the v6 backend map (v4-in-v6), and only later on
  1558  		// after DNAT transforms the packet into a v4 one.
  1559  		for _, b := range newBackends {
  1560  			if err := s.lbmap.AddBackend(b, true); err != nil {
  1561  				return err
  1562  			}
  1563  		}
  1564  	}
  1565  	svc.svcNatPolicy = natPolicy
  1567  	p := &datapathTypes.UpsertServiceParams{
  1568  		ID:                        uint16(svc.frontend.ID),
  1569  		IP:                        svc.frontend.L3n4Addr.AddrCluster.AsNetIP(),
  1570  		Port:                      svc.frontend.L3n4Addr.L4Addr.Port,
  1571  		PreferredBackends:         preferredBackends,
  1572  		ActiveBackends:            activeBackends,
  1573  		NonActiveBackends:         nonActiveBackends,
  1574  		PrevBackendsCount:         prevBackendCount,
  1575  		IPv6:                      v6FE,
  1576  		NatPolicy:                 natPolicy,
  1577  		Type:                      svc.svcType,
  1578  		ExtLocal:                  isExtLocal,
  1579  		IntLocal:                  isIntLocal,
  1580  		Scope:                     svc.frontend.L3n4Addr.Scope,
  1581  		SessionAffinity:           svc.sessionAffinity,
  1582  		SessionAffinityTimeoutSec: svc.sessionAffinityTimeoutSec,
  1583  		CheckSourceRange:          checkLBSrcRange,
  1584  		UseMaglev:                 svc.useMaglev(),
  1585  		L7LBProxyPort:             svc.l7LBProxyPort,
  1586  		Name:                      svc.svcName,
  1587  		LoopbackHostport:          svc.LoopbackHostport,
  1588  	}
  1589  	if err := s.lbmap.UpsertService(p); err != nil {
  1590  		return err
  1591  	}
  1593  	// If L7 LB is configured for this service then BPF level session affinity is not used.
  1594  	if option.Config.EnableSessionAffinity && !svc.isL7LBService() {
  1595  		s.addBackendsToAffinityMatchMap(svc.frontend.ID, toAddAffinity)
  1596  	}
  1598  	// Remove backends not used by any service from BPF maps
  1599  	for _, be := range obsoleteBackends {
  1600  		id := be.ID
  1601  		if debugLogsEnabled {
  1602  			getScopedLog().WithField(logfields.BackendID, id).
  1603  				Debug("Removing obsolete backend")
  1604  		}
  1605  		s.lbmap.DeleteBackendByID(id)
  1606  		s.TerminateUDPConnectionsToBackend(&be.L3n4Addr)
  1607  	}
  1609  	return nil
  1610  }
  1612  func (s *Service) restoreBackendsLocked(svcBackendsById map[lb.BackendID]struct{}) error {
  1613  	failed, restored, skipped := 0, 0, 0
  1614  	backends, err := s.lbmap.DumpBackendMaps()
  1615  	if err != nil {
  1616  		return fmt.Errorf("Unable to dump backend maps: %w", err)
  1617  	}
  1619  	debugLogsEnabled := logging.CanLogAt(log.Logger, logrus.DebugLevel)
  1621  	svcBackendsCount := len(svcBackendsById)
  1622  	for _, b := range backends {
  1623  		if debugLogsEnabled {
  1624  			log.WithFields(logrus.Fields{
  1625  				logfields.BackendID:        b.ID,
  1626  				logfields.L3n4Addr:         b.L3n4Addr.String(),
  1627  				logfields.BackendState:     b.State,
  1628  				logfields.BackendPreferred: b.Preferred,
  1629  			}).Debug("Restoring backend")
  1630  		}
  1632  		if _, ok := svcBackendsById[b.ID]; !ok && (svcBackendsCount != 0) {
  1633  			// If a backend by ID isn't referenced by any of the services, it's
  1634  			// likely a leaked backend. In case of duplicate leaked backends,
  1635  			// there would be multiple IDs allocated for the same backend resource
  1636  			// identified by its L3nL4Addr hash. The second check for service
  1637  			// backends count is added for unusual cases where there might've been
  1638  			// a problem with reading entries from the services map. In such cases,
  1639  			// the agent should not wipe out the backends map, as this can disrupt
  1640  			// existing connections. SyncWithK8sFinished will later sync the backends
  1641  			// map with the latest state.
  1642  			// Leaked backend scenarios:
  1643  			// 1) Backend entries leaked, no duplicates
  1644  			// 2) Backend entries leaked with duplicates:
  1645  			// 	a) backend with overlapping L3nL4Addr hash is associated with service(s)
  1646  			//     Sequence of events:
  1647  			//     Backends were leaked prior to agent restart, but there was at least
  1648  			//     one service that the backend by hash is associated with.
  1649  			//     s.backendByHash will have a non-zero reference count for the
  1650  			//     overlapping L3nL4Addr hash.
  1651  			// 	b) none of the backends are associated with services
  1652  			//     Sequence of events:
  1653  			// 	   All the services these backends were associated with were deleted
  1654  			//     prior to agent restart.
  1655  			//     s.backendByHash will not have an entry for the backends hash.
  1656  			// As none of the service entries have a reference to these backends
  1657  			// in the services map, the backends were likely not available for
  1658  			// load-balancing new traffic. While there is a slim chance that the
  1659  			// backends could have previously established active connections,
  1660  			// and these connections can get disrupted. However, the leaks likely
  1661  			// happened when service entries were deleted, so those connections
  1662  			// were also expected to be terminated.
  1663  			// Regardless, delete the duplicates as this can affect restoration of current
  1664  			// active backends, and may prevent new backends getting added as map
  1665  			// size is limited, which can lead to connectivity disruptions.
  1666  			id := b.ID
  1667  			DeleteBackendID(id)
  1668  			if err := s.lbmap.DeleteBackendByID(id); err != nil {
  1669  				// As the backends map is not expected to be updated during restore,
  1670  				// the deletion call shouldn't fail. But log the error, just
  1671  				// in case...
  1672  				log.Errorf("unable to delete leaked backend: %v", id)
  1673  			}
  1674  			if debugLogsEnabled {
  1675  				log.WithFields(logrus.Fields{
  1676  					logfields.BackendID:        b.ID,
  1677  					logfields.L3n4Addr:         b.L3n4Addr,
  1678  					logfields.BackendState:     b.State,
  1679  					logfields.BackendPreferred: b.Preferred,
  1680  				}).Debug("Leaked backend entry not restored")
  1681  			}
  1682  			skipped++
  1683  			continue
  1684  		}
  1685  		if err := RestoreBackendID(b.L3n4Addr, b.ID); err != nil {
  1686  			log.WithError(err).WithFields(logrus.Fields{
  1687  				logfields.BackendID:        b.ID,
  1688  				logfields.L3n4Addr:         b.L3n4Addr,
  1689  				logfields.BackendState:     b.State,
  1690  				logfields.BackendPreferred: b.Preferred,
  1691  			}).Warning("Unable to restore backend")
  1692  			failed++
  1693  			continue
  1694  		}
  1695  		restored++
  1696  		hash := b.L3n4Addr.Hash()
  1697  		s.backendByHash[hash] = b
  1698  	}
  1700  	log.WithFields(logrus.Fields{
  1701  		logfields.RestoredBackends: restored,
  1702  		logfields.FailedBackends:   failed,
  1703  		logfields.SkippedBackends:  skipped,
  1704  	}).Info("Restored backends from maps")
  1706  	return nil
  1707  }
  1709  func (s *Service) deleteOrphanBackends() error {
  1710  	orphanBackends := 0
  1712  	for hash, b := range s.backendByHash {
  1713  		if s.backendRefCount[hash] == 0 {
  1714  			log.WithField(logfields.BackendID, b.ID).
  1715  				Debug("Removing orphan backend")
  1716  			// The b.ID is unique across IPv4/6, hence attempt
  1717  			// to clean it from both maps, and ignore errors.
  1718  			DeleteBackendID(b.ID)
  1719  			s.lbmap.DeleteBackendByID(b.ID)
  1720  			delete(s.backendByHash, hash)
  1721  			orphanBackends++
  1722  		}
  1723  	}
  1724  	log.WithFields(logrus.Fields{
  1725  		logfields.OrphanBackends: orphanBackends,
  1726  	}).Info("Deleted orphan backends")
  1728  	return nil
  1729  }
  1731  func (s *Service) restoreServicesLocked(svcBackendsById map[lb.BackendID]struct{}) error {
  1732  	failed, restored := 0, 0
  1734  	svcs, errors := s.lbmap.DumpServiceMaps()
  1735  	for _, err := range errors {
  1736  		log.WithError(err).Warning("Error occurred while dumping service maps")
  1737  	}
  1739  	for _, svc := range svcs {
  1740  		scopedLog := log.WithFields(logrus.Fields{
  1741  			logfields.ServiceID: svc.Frontend.ID,
  1742  			logfields.ServiceIP: svc.Frontend.L3n4Addr.String(),
  1743  		})
  1744  		scopedLog.Debug("Restoring service")
  1746  		if _, err := RestoreID(svc.Frontend.L3n4Addr, uint32(svc.Frontend.ID)); err != nil {
  1747  			failed++
  1748  			scopedLog.WithError(err).Warning("Unable to restore service ID")
  1749  		}
  1751  		newSVC := &svcInfo{
  1752  			hash:                svc.Frontend.Hash(),
  1753  			frontend:            svc.Frontend,
  1754  			backends:            svc.Backends,
  1755  			backendByHash:       map[string]*lb.Backend{},
  1756  			svcType:             svc.Type,
  1757  			svcExtTrafficPolicy: svc.ExtTrafficPolicy,
  1758  			svcIntTrafficPolicy: svc.IntTrafficPolicy,
  1759  			svcNatPolicy:        svc.NatPolicy,
  1760  			LoopbackHostport:    svc.LoopbackHostport,
  1762  			sessionAffinity:           svc.SessionAffinity,
  1763  			sessionAffinityTimeoutSec: svc.SessionAffinityTimeoutSec,
  1765  			// Indicate that the svc was restored from the BPF maps, so that
  1766  			// SyncWithK8sFinished() could remove services which were restored
  1767  			// from the maps but not present in the k8sServiceCache (e.g. a svc
  1768  			// was deleted while cilium-agent was down).
  1769  			restoredFromDatapath: true,
  1770  		}
  1772  		for j, backend := range svc.Backends {
  1773  			// DumpServiceMaps() can return services with some empty (nil) backends.
  1774  			if backend == nil {
  1775  				continue
  1776  			}
  1778  			hash := backend.L3n4Addr.Hash()
  1779  			s.backendRefCount.Add(hash)
  1780  			newSVC.backendByHash[hash] = svc.Backends[j]
  1781  			svcBackendsById[backend.ID] = struct{}{}
  1782  		}
  1784  		if len(newSVC.backendByHash) > 0 {
  1785  			// Indicate that these backends were restored from BPF maps,
  1786  			// so that they are not removed until SyncWithK8sFinished()
  1787  			// is executed (if not observed in the meanwhile) to prevent
  1788  			// disrupting valid connections.
  1789  			newSVC.restoredBackendHashes = sets.KeySet(newSVC.backendByHash)
  1790  		}
  1792  		// Recalculate Maglev lookup tables if the maps were removed due to
  1793  		// the changed M param.
  1794  		ipv6 := newSVC.frontend.IsIPv6() || (svc.NatPolicy == lb.SVCNatPolicyNat46)
  1795  		recreated := s.lbmap.IsMaglevLookupTableRecreated(ipv6)
  1796  		if option.Config.DatapathMode == datapathOpt.DatapathModeLBOnly &&
  1797  			newSVC.useMaglev() && recreated {
  1799  			backends := make(map[string]*lb.Backend, len(newSVC.backends))
  1800  			for _, b := range newSVC.backends {
  1801  				// DumpServiceMaps() can return services with some empty (nil) backends.
  1802  				if b == nil {
  1803  					continue
  1804  				}
  1806  				backends[b.String()] = b
  1807  			}
  1808  			if err := s.lbmap.UpsertMaglevLookupTable(uint16(newSVC.frontend.ID), backends,
  1809  				ipv6); err != nil {
  1810  				scopedLog.WithError(err).Warning("Unable to upsert into the Maglev BPF map.")
  1811  				continue
  1812  			}
  1813  		}
  1815  		s.svcByHash[newSVC.hash] = newSVC
  1816  		s.svcByID[newSVC.frontend.ID] = newSVC
  1817  		restored++
  1818  	}
  1820  	log.WithFields(logrus.Fields{
  1821  		logfields.RestoredSVCs: restored,
  1822  		logfields.FailedSVCs:   failed,
  1823  	}).Info("Restored services from maps")
  1825  	return nil
  1826  }
  1828  func (s *Service) deleteServiceLocked(svc *svcInfo) error {
  1829  	ipv6 := svc.frontend.L3n4Addr.IsIPv6() || svc.svcNatPolicy == lb.SVCNatPolicyNat46
  1830  	obsoleteBackendIDs, obsoleteBackends := s.deleteBackendsFromCacheLocked(svc)
  1831  	scopedLog := log.WithFields(logrus.Fields{
  1832  		logfields.ServiceID: svc.frontend.ID,
  1833  		logfields.ServiceIP: svc.frontend.L3n4Addr,
  1834  		logfields.Backends:  svc.backends,
  1835  	})
  1836  	scopedLog.Debug("Deleting service")
  1838  	if err := s.lbmap.DeleteService(svc.frontend, len(svc.backends),
  1839  		svc.useMaglev(), svc.svcNatPolicy); err != nil {
  1840  		return err
  1841  	}
  1843  	// Delete affinity matches
  1844  	if option.Config.EnableSessionAffinity && svc.sessionAffinity {
  1845  		backendIDs := make([]lb.BackendID, 0, len(svc.backends))
  1846  		for _, b := range svc.backends {
  1847  			backendIDs = append(backendIDs, b.ID)
  1848  		}
  1849  		s.deleteBackendsFromAffinityMatchMap(svc.frontend.ID, backendIDs)
  1850  	}
  1852  	if option.Config.EnableSVCSourceRangeCheck &&
  1853  		svc.svcType == lb.SVCTypeLoadBalancer {
  1854  		if err := s.lbmap.UpdateSourceRanges(uint16(svc.frontend.ID),
  1855  			svc.loadBalancerSourceRanges, nil, ipv6); err != nil {
  1856  			return err
  1857  		}
  1858  	}
  1860  	delete(s.svcByHash, svc.hash)
  1861  	delete(s.svcByID, svc.frontend.ID)
  1863  	for _, id := range obsoleteBackendIDs {
  1864  		scopedLog.WithField(logfields.BackendID, id).
  1865  			Debug("Deleting obsolete backend")
  1866  		s.lbmap.DeleteBackendByID(id)
  1867  	}
  1868  	if err := DeleteID(uint32(svc.frontend.ID)); err != nil {
  1869  		return fmt.Errorf("Unable to release service ID %d: %w", svc.frontend.ID, err)
  1870  	}
  1872  	// Delete managed neighbor entries of the LB
  1873  	if option.Config.DatapathMode == datapathOpt.DatapathModeLBOnly {
  1874  		s.deleteBackendNeighbors(obsoleteBackends)
  1875  	}
  1877  	if svc.healthcheckFrontendHash != "" {
  1878  		healthSvc := s.svcByHash[svc.healthcheckFrontendHash]
  1879  		if healthSvc != nil {
  1880  			s.deleteServiceLocked(healthSvc)
  1881  		}
  1882  	}
  1884  	if option.Config.EnableHealthCheckNodePort {
  1885  		s.healthServer.DeleteService(lb.ID(svc.frontend.ID))
  1886  	}
  1888  	metrics.ServicesEventsCount.WithLabelValues("delete").Inc()
  1889  	s.notifyMonitorServiceDelete(svc.frontend.ID)
  1891  	return nil
  1892  }
  1894  func (s *Service) updateBackendsCacheLocked(svc *svcInfo, backends []*lb.Backend) (
  1895  	[]*lb.Backend, []*lb.Backend, []lb.BackendID, error,
  1896  ) {
  1897  	obsoleteBackends := []*lb.Backend{}       // not used by any svc
  1898  	obsoleteSVCBackendIDs := []lb.BackendID{} // removed from the svc, but might be used by other svc
  1899  	newBackends := []*lb.Backend{}            // previously not used by any svc
  1900  	backendSet := map[string]struct{}{}
  1902  	for i, backend := range backends {
  1903  		hash := backend.L3n4Addr.Hash()
  1904  		backendSet[hash] = struct{}{}
  1906  		if b, found := svc.backendByHash[hash]; !found {
  1907  			if s.backendRefCount.Add(hash) {
  1908  				id, err := AcquireBackendID(backend.L3n4Addr)
  1909  				if err != nil {
  1910  					s.backendRefCount.Delete(hash)
  1911  					return nil, nil, nil, fmt.Errorf("Unable to acquire backend ID for %q: %w",
  1912  						backend.L3n4Addr, err)
  1913  				}
  1914  				backends[i].ID = id
  1915  				backends[i].Weight = backend.Weight
  1916  				newBackends = append(newBackends, backends[i])
  1917  				s.backendByHash[hash] = backends[i].DeepCopy()
  1918  			} else {
  1919  				backends[i].ID = s.backendByHash[hash].ID
  1920  			}
  1921  		} else {
  1922  			// We observed this backend, hence let's remove it from the list
  1923  			// of the restored ones.
  1924  			svc.restoredBackendHashes.Delete(hash)
  1926  			backends[i].ID = b.ID
  1927  			// Backend state can either be updated via kubernetes events,
  1928  			// or service API. If the state update is coming via kubernetes events,
  1929  			// then we need to update the internal state. Currently, the only state
  1930  			// update in this case is for the terminating state or when backend
  1931  			// weight has changed. All other state updates happen via the API
  1932  			// (UpdateBackendsState) in which case we need to set the backend state
  1933  			// to the saved state.
  1934  			switch {
  1935  			case backends[i].State == lb.BackendStateTerminating &&
  1936  				b.State != lb.BackendStateTerminating:
  1937  				b.State = backends[i].State
  1938  				// Update the persisted backend state in BPF maps.
  1939  				if err := s.lbmap.UpdateBackendWithState(backends[i]); err != nil {
  1940  					return nil, nil, nil, fmt.Errorf("failed to update backend %+v: %w",
  1941  						backends[i], err)
  1942  				}
  1943  			case backends[i].Weight != b.Weight:
  1944  				// Update the cached weight as weight has changed
  1945  				b.Weight = backends[i].Weight
  1946  				// Update but do not persist the state as backend might be set as active
  1947  				// only temporarily for specific service
  1948  				b.State = backends[i].State
  1949  			default:
  1950  				// Set the backend state to the saved state.
  1951  				backends[i].State = b.State
  1952  			}
  1953  		}
  1954  		svc.backendByHash[hash] = backends[i]
  1955  	}
  1957  	for hash, backend := range svc.backendByHash {
  1958  		if _, found := backendSet[hash]; !found {
  1959  			if svc.restoredBackendHashes.Has(hash) {
  1960  				// Don't treat backends restored from the datapath and not yet observed as
  1961  				// obsolete, because that would cause connections targeting those backends
  1962  				// to be dropped in case we haven't fully synchronized yet.
  1963  				backends = append(backends, backend)
  1964  				continue
  1965  			}
  1967  			obsoleteSVCBackendIDs = append(obsoleteSVCBackendIDs, backend.ID)
  1968  			if s.backendRefCount.Delete(hash) {
  1969  				DeleteBackendID(backend.ID)
  1970  				delete(s.backendByHash, hash)
  1971  				obsoleteBackends = append(obsoleteBackends, backend)
  1972  			}
  1973  			delete(svc.backendByHash, hash)
  1974  		}
  1975  	}
  1977  	svc.backends = backends
  1978  	return newBackends, obsoleteBackends, obsoleteSVCBackendIDs, nil
  1979  }
  1981  func (s *Service) deleteBackendsFromCacheLocked(svc *svcInfo) ([]lb.BackendID, []*lb.Backend) {
  1982  	obsoleteBackendIDs := []lb.BackendID{}
  1983  	obsoleteBackends := []*lb.Backend{}
  1985  	for hash, backend := range svc.backendByHash {
  1986  		if s.backendRefCount.Delete(hash) {
  1987  			DeleteBackendID(backend.ID)
  1988  			obsoleteBackendIDs = append(obsoleteBackendIDs, backend.ID)
  1989  			obsoleteBackends = append(obsoleteBackends, backend.DeepCopy())
  1990  		}
  1991  	}
  1993  	return obsoleteBackendIDs, obsoleteBackends
  1994  }
  1996  func (s *Service) notifyMonitorServiceUpsert(frontend lb.L3n4AddrID, backends []*lb.Backend,
  1997  	svcType lb.SVCType, svcExtTrafficPolicy, svcIntTrafficPolicy lb.SVCTrafficPolicy, svcName, svcNamespace string,
  1998  ) {
  1999  	id := uint32(frontend.ID)
  2000  	fe := monitorAPI.ServiceUpsertNotificationAddr{
  2001  		IP:   frontend.AddrCluster.AsNetIP(),
  2002  		Port: frontend.Port,
  2003  	}
  2005  	be := make([]monitorAPI.ServiceUpsertNotificationAddr, 0, len(backends))
  2006  	for _, backend := range backends {
  2007  		b := monitorAPI.ServiceUpsertNotificationAddr{
  2008  			IP:   backend.AddrCluster.AsNetIP(),
  2009  			Port: backend.Port,
  2010  		}
  2011  		be = append(be, b)
  2012  	}
  2014  	msg := monitorAPI.ServiceUpsertMessage(id, fe, be, string(svcType), string(svcExtTrafficPolicy), string(svcIntTrafficPolicy), svcName, svcNamespace)
  2015  	s.monitorAgent.SendEvent(monitorAPI.MessageTypeAgent, msg)
  2016  }
  2018  func (s *Service) notifyMonitorServiceDelete(id lb.ID) {
  2019  	s.monitorAgent.SendEvent(monitorAPI.MessageTypeAgent, monitorAPI.ServiceDeleteMessage(uint32(id)))
  2020  }
  2022  // GetServiceNameByAddr returns namespace and name of the service with a given L3n4Addr. The third
  2023  // return value is set to true if and only if the service is found in the map.
  2024  func (s *Service) GetServiceNameByAddr(addr lb.L3n4Addr) (string, string, bool) {
  2025  	s.RLock()
  2026  	defer s.RUnlock()
  2028  	svc, found := s.svcByHash[addr.Hash()]
  2029  	if !found {
  2030  		return "", "", false
  2031  	}
  2033  	return svc.svcName.Namespace, svc.svcName.Name, true
  2034  }
  2036  // isWildcardAddr returns true if given frontend is used for wildcard svc lookups
  2037  // (by bpf_sock).
  2038  func isWildcardAddr(frontend lb.L3n4AddrID) bool {
  2039  	if frontend.IsIPv6() {
  2040  		return cmtypes.MustParseAddrCluster("::").Equal(frontend.AddrCluster)
  2041  	}
  2042  	return cmtypes.MustParseAddrCluster("").Equal(frontend.AddrCluster)
  2043  }
  2045  // segregateBackends returns the list of active, preferred and nonActive backends to be
  2046  // added to the lbmaps. If EnableK8sTerminatingEndpoint and there are no active backends,
  2047  // segregateBackends will return all terminating backends as active.
  2048  func segregateBackends(backends []*lb.Backend) (preferredBackends map[string]*lb.Backend,
  2049  	activeBackends map[string]*lb.Backend, nonActiveBackends []lb.BackendID,
  2050  ) {
  2051  	preferredBackends = make(map[string]*lb.Backend)
  2052  	activeBackends = make(map[string]*lb.Backend, len(backends))
  2054  	for _, b := range backends {
  2055  		// Separate active from non-active backends so that they won't be selected
  2056  		// to serve new requests, but can be restored after agent restart. Non-active backends
  2057  		// are kept in the affinity and backend maps so that existing connections
  2058  		// are able to terminate gracefully. Such backends would either be cleaned-up
  2059  		// when the backends are deleted, or they could transition to active state.
  2060  		if b.State == lb.BackendStateActive {
  2061  			activeBackends[b.String()] = b
  2062  			// keep another list of preferred backends if available
  2063  			if b.Preferred {
  2064  				preferredBackends[b.String()] = b
  2065  			}
  2066  		} else {
  2067  			nonActiveBackends = append(nonActiveBackends, b.ID)
  2068  		}
  2069  	}
  2070  	// To avoid connections drops during rolling updates, Kubernetes defines a Terminating state on the EndpointSlices
  2071  	// that can be used to identify Pods that, despite being terminated, still can serve traffic.
  2072  	// In case that there are no Active backends, use the Backends in TerminatingState to answer new requests
  2073  	// and avoid traffic disruption until new active backends are created.
  2074  	//
  2075  	if option.Config.EnableK8sTerminatingEndpoint && len(activeBackends) == 0 {
  2076  		nonActiveBackends = []lb.BackendID{}
  2077  		for _, b := range backends {
  2078  			if b.State == lb.BackendStateTerminating {
  2079  				activeBackends[b.String()] = b
  2080  			} else {
  2081  				nonActiveBackends = append(nonActiveBackends, b.ID)
  2082  			}
  2083  		}
  2084  	}
  2085  	return preferredBackends, activeBackends, nonActiveBackends
  2086  }
  2088  // SyncNodePortFrontends updates all NodePort services with a new set of frontend
  2089  // IP addresses.
  2090  func (s *Service) SyncNodePortFrontends(addrs sets.Set[netip.Addr]) error {
  2091  	s.Lock()
  2092  	defer s.Unlock()
  2094  	existingFEs := sets.New[netip.Addr]()
  2095  	removedFEs := make([]*svcInfo, 0)
  2097  	// Find all NodePort services by finding the surrogate services, and find
  2098  	// services with a removed frontend.
  2099  	v4Svcs := make([]*svcInfo, 0)
  2100  	v6Svcs := make([]*svcInfo, 0)
  2101  	for _, svc := range s.svcByID {
  2102  		if svc.svcType != lb.SVCTypeNodePort {
  2103  			continue
  2104  		}
  2106  		switch svc.frontend.AddrCluster.Addr() {
  2107  		case netip.IPv4Unspecified():
  2108  			v4Svcs = append(v4Svcs, svc)
  2109  		case netip.IPv6Unspecified():
  2110  			v6Svcs = append(v6Svcs, svc)
  2111  		default:
  2112  			addr := svc.frontend.AddrCluster.Addr()
  2113  			existingFEs.Insert(addr)
  2114  			if _, ok := addrs[addr]; !ok {
  2115  				removedFEs = append(removedFEs, svc)
  2116  			}
  2117  		}
  2118  	}
  2120  	// Delete the services of the removed frontends
  2121  	for _, svc := range removedFEs {
  2122  		log := log.WithField(logfields.K8sNamespace, svc.svcName.Namespace).
  2123  			WithField(logfields.K8sSvcName, svc.svcName.Name).
  2124  			WithField(logfields.L3n4Addr, svc.frontend.L3n4Addr)
  2126  		if err := s.deleteServiceLocked(svc); err != nil {
  2127  			return fmt.Errorf("delete service: %w", err)
  2128  		} else {
  2129  			log.Debug("Deleted nodeport service of a removed frontend")
  2130  		}
  2131  	}
  2133  	// Create services for the new frontends
  2134  	for addr := range addrs {
  2135  		if !existingFEs.Has(addr) {
  2136  			// No services for this frontend, create them.
  2137  			svcs := v4Svcs
  2138  			if addr.Is6() {
  2139  				svcs = v6Svcs
  2140  			}
  2141  			for _, svcInfo := range svcs {
  2142  				fe := lb.NewL3n4AddrID(
  2143  					svcInfo.frontend.Protocol,
  2144  					cmtypes.AddrClusterFrom(addr, svcInfo.frontend.AddrCluster.ClusterID()),
  2145  					svcInfo.frontend.Port,
  2146  					svcInfo.frontend.Scope,
  2147  					0,
  2148  				)
  2149  				svc := svcInfo.deepCopyToLBSVC()
  2150  				svc.Frontend = *fe
  2152  				log := log.WithField(logfields.K8sNamespace, svc.Name.Namespace).
  2153  					WithField(logfields.K8sSvcName, svc.Name.Name).
  2154  					WithField(logfields.L3n4Addr, svc.Frontend.L3n4Addr)
  2155  				_, _, err := s.upsertService(svc)
  2156  				if err != nil {
  2157  					return fmt.Errorf("upsert service: %w", err)
  2158  				} else {
  2159  					log.Debug("Created nodeport service for new frontend")
  2160  				}
  2161  			}
  2162  		}
  2163  	}
  2164  	return nil
  2165  }
  2167  func backendToNode(b *lb.Backend) *nodeTypes.Node {
  2168  	return &nodeTypes.Node{
  2169  		Name: fmt.Sprintf("backend-%s", b.L3n4Addr.AddrCluster.AsNetIP()),
  2170  		IPAddresses: []nodeTypes.Address{{
  2171  			Type: addressing.NodeInternalIP,
  2172  			IP:   b.L3n4Addr.AddrCluster.AsNetIP(),
  2173  		}},
  2174  	}
  2175  }
  2177  func (s *Service) upsertBackendNeighbors(newBackends, oldBackends []*lb.Backend) {
  2178  	for _, b := range newBackends {
  2179  		s.backendDiscovery.InsertMiscNeighbor(backendToNode(b))
  2180  	}
  2181  	s.deleteBackendNeighbors(oldBackends)
  2182  }
  2184  func (s *Service) deleteBackendNeighbors(obsoleteBackends []*lb.Backend) {
  2185  	for _, b := range obsoleteBackends {
  2186  		s.backendDiscovery.DeleteMiscNeighbor(backendToNode(b))
  2187  	}
  2188  }