github.com/cilium/cilium@v1.16.2/pkg/endpointmanager/manager.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package endpointmanager
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"maps"
    11  	"net/netip"
    12  	"sync"
    13  
    14  	"github.com/cilium/hive/cell"
    15  	"github.com/sirupsen/logrus"
    16  
    17  	"github.com/cilium/cilium/pkg/completion"
    18  	"github.com/cilium/cilium/pkg/controller"
    19  	"github.com/cilium/cilium/pkg/endpoint"
    20  	endpointid "github.com/cilium/cilium/pkg/endpoint/id"
    21  	"github.com/cilium/cilium/pkg/endpoint/regeneration"
    22  	"github.com/cilium/cilium/pkg/identity/cache"
    23  	"github.com/cilium/cilium/pkg/ipcache"
    24  	"github.com/cilium/cilium/pkg/lock"
    25  	"github.com/cilium/cilium/pkg/logging"
    26  	"github.com/cilium/cilium/pkg/logging/logfields"
    27  	"github.com/cilium/cilium/pkg/mcastmanager"
    28  	"github.com/cilium/cilium/pkg/metrics"
    29  	"github.com/cilium/cilium/pkg/metrics/metric"
    30  	"github.com/cilium/cilium/pkg/node"
    31  	"github.com/cilium/cilium/pkg/option"
    32  	"github.com/cilium/cilium/pkg/policy"
    33  	"github.com/cilium/cilium/pkg/time"
    34  )
    35  
    36  var (
    37  	log         = logging.DefaultLogger.WithField(logfields.LogSubsys, "endpoint-manager")
    38  	metricsOnce sync.Once
    39  	launchTime  = 30 * time.Second
    40  
    41  	endpointGCControllerGroup = controller.NewGroup("endpoint-gc")
    42  )
    43  
    44  // endpointManager is a structure designed for containing state about the
    45  // collection of locally running endpoints.
    46  type endpointManager struct {
    47  	health cell.Health
    48  
    49  	// mutex protects endpoints and endpointsAux
    50  	mutex lock.RWMutex
    51  
    52  	// endpoints is the global list of endpoints indexed by ID. mutex must
    53  	// be held to read and write.
    54  	endpoints    map[uint16]*endpoint.Endpoint
    55  	endpointsAux map[string]*endpoint.Endpoint
    56  
    57  	// mcastManager handles IPv6 multicast group join/leave for pods. This is required for the
    58  	// node to receive ICMPv6 NDP messages, especially NS (Neighbor Solicitation) message, so
    59  	// pod's IPv6 address is discoverable.
    60  	mcastManager *mcastmanager.MCastManager
    61  
    62  	// EndpointSynchronizer updates external resources (e.g., Kubernetes) with
    63  	// up-to-date information about endpoints managed by the endpoint manager.
    64  	EndpointResourceSynchronizer
    65  
    66  	// subscribers are notified when events occur in the endpointManager.
    67  	subscribers map[Subscriber]struct{}
    68  
    69  	// checkHealth supports endpoint garbage collection by verifying the health
    70  	// of an endpoint.
    71  	checkHealth EndpointCheckerFunc
    72  
    73  	// deleteEndpoint is the function used to remove the endpoint from the
    74  	// endpointManager and clean it up. Always set to RemoveEndpoint.
    75  	deleteEndpoint endpointDeleteFunc
    76  
    77  	// A mark-and-sweep garbage collector may operate on the endpoint list.
    78  	// This is configured via WithPeriodicEndpointGC() and will mark
    79  	// endpoints for removal on one run of the controller, then in the
    80  	// subsequent controller run will remove the endpoints.
    81  	markedEndpoints []uint16
    82  
    83  	// controllers associated with the endpoint manager.
    84  	controllers *controller.Manager
    85  
    86  	policyMapPressure *policyMapPressure
    87  
    88  	// locaNodeStore allows to retrieve information and observe changes about
    89  	// the local node.
    90  	localNodeStore *node.LocalNodeStore
    91  
    92  	// Allocator for local endpoint identifiers.
    93  	epIDAllocator *epIDAllocator
    94  }
    95  
    96  // endpointDeleteFunc is used to abstract away concrete Endpoint Delete
    97  // functionality from endpoint management for testing purposes.
    98  type endpointDeleteFunc func(*endpoint.Endpoint, endpoint.DeleteConfig) []error
    99  
   100  // New creates a new endpointManager.
   101  func New(epSynchronizer EndpointResourceSynchronizer, lns *node.LocalNodeStore, health cell.Health) *endpointManager {
   102  	mgr := endpointManager{
   103  		health:                       health,
   104  		endpoints:                    make(map[uint16]*endpoint.Endpoint),
   105  		endpointsAux:                 make(map[string]*endpoint.Endpoint),
   106  		mcastManager:                 mcastmanager.New(option.Config.IPv6MCastDevice),
   107  		EndpointResourceSynchronizer: epSynchronizer,
   108  		subscribers:                  make(map[Subscriber]struct{}),
   109  		controllers:                  controller.NewManager(),
   110  		localNodeStore:               lns,
   111  		epIDAllocator:                newEPIDAllocator(),
   112  	}
   113  	mgr.deleteEndpoint = mgr.removeEndpoint
   114  	mgr.policyMapPressure = newPolicyMapPressure()
   115  	return &mgr
   116  }
   117  
   118  // WithPeriodicEndpointGC runs a controller to periodically garbage collect
   119  // endpoints that match the specified EndpointCheckerFunc.
   120  func (mgr *endpointManager) WithPeriodicEndpointGC(ctx context.Context, checkHealth EndpointCheckerFunc, interval time.Duration) *endpointManager {
   121  	mgr.checkHealth = checkHealth
   122  	mgr.controllers.UpdateController("endpoint-gc",
   123  		controller.ControllerParams{
   124  			Group:       endpointGCControllerGroup,
   125  			DoFunc:      mgr.markAndSweep,
   126  			RunInterval: interval,
   127  			Context:     ctx,
   128  			Health:      mgr.health.NewScope("endpoint-gc"),
   129  		})
   130  	return mgr
   131  }
   132  
   133  // waitForProxyCompletions blocks until all proxy changes have been completed.
   134  func waitForProxyCompletions(proxyWaitGroup *completion.WaitGroup) error {
   135  	err := proxyWaitGroup.Context().Err()
   136  	if err != nil {
   137  		return fmt.Errorf("context cancelled before waiting for proxy updates: %w", err)
   138  	}
   139  
   140  	start := time.Now()
   141  	log.Debug("Waiting for proxy updates to complete...")
   142  	err = proxyWaitGroup.Wait()
   143  	if err != nil {
   144  		return fmt.Errorf("proxy updates failed: %w", err)
   145  	}
   146  	log.Debug("Wait time for proxy updates: ", time.Since(start))
   147  
   148  	return nil
   149  }
   150  
   151  // UpdatePolicyMaps returns a WaitGroup which is signaled upon once all endpoints
   152  // have had their PolicyMaps updated against the Endpoint's desired policy state.
   153  //
   154  // Endpoints will wait on the 'notifyWg' parameter before updating policy maps.
   155  func (mgr *endpointManager) UpdatePolicyMaps(ctx context.Context, notifyWg *sync.WaitGroup) *sync.WaitGroup {
   156  	var epWG sync.WaitGroup
   157  	var wg sync.WaitGroup
   158  
   159  	proxyWaitGroup := completion.NewWaitGroup(ctx)
   160  
   161  	eps := mgr.GetEndpoints()
   162  	epWG.Add(len(eps))
   163  	wg.Add(1)
   164  
   165  	// This is in a goroutine to allow the caller to proceed with other tasks before waiting for the ACKs to complete
   166  	go func() {
   167  		// Wait for all the eps to have applied policy map
   168  		// changes before waiting for the changes to be ACKed
   169  		epWG.Wait()
   170  		if err := waitForProxyCompletions(proxyWaitGroup); err != nil {
   171  			log.WithError(err).Warning("Failed to apply L7 proxy policy changes. These will be re-applied in future updates.")
   172  		}
   173  		wg.Done()
   174  	}()
   175  
   176  	// TODO: bound by number of CPUs?
   177  	for _, ep := range eps {
   178  		go func(ep *endpoint.Endpoint) {
   179  			// Proceed only after all notifications have been delivered to endpoints
   180  			notifyWg.Wait()
   181  			if err := ep.ApplyPolicyMapChanges(proxyWaitGroup); err != nil && !errors.Is(err, endpoint.ErrNotAlive) {
   182  				ep.Logger("endpointmanager").WithError(err).Warning("Failed to apply policy map changes. These will be re-applied in future updates.")
   183  			}
   184  			epWG.Done()
   185  		}(ep)
   186  	}
   187  
   188  	return &wg
   189  }
   190  
   191  // InitMetrics hooks the endpointManager into the metrics subsystem. This can
   192  // only be done once, globally, otherwise the metrics library will panic.
   193  func (mgr *endpointManager) InitMetrics(registry *metrics.Registry) {
   194  	if option.Config.DryMode {
   195  		return
   196  	}
   197  	metricsOnce.Do(func() { // Endpoint is a function used to collect this metric. We cannot
   198  		// increment/decrement a gauge since we invoke Remove gratuitously and that
   199  		// would result in negative counts.
   200  		// It must be thread-safe.
   201  
   202  		metrics.Endpoint = metric.NewGaugeFunc(metric.GaugeOpts{
   203  			Namespace: metrics.Namespace,
   204  			Name:      "endpoint",
   205  			Help:      "Number of endpoints managed by this agent",
   206  		},
   207  			func() float64 { return float64(len(mgr.GetEndpoints())) },
   208  		)
   209  		registry.MustRegister(metrics.Endpoint)
   210  	})
   211  }
   212  
   213  // allocateID checks if the ID can be reused. If it cannot, returns an error.
   214  // If an ID of 0 is provided, a new ID is allocated. If a new ID cannot be
   215  // allocated, returns an error.
   216  func (mgr *endpointManager) allocateID(currID uint16) (uint16, error) {
   217  	var newID uint16
   218  	if currID != 0 {
   219  		if err := mgr.epIDAllocator.reuse(currID); err != nil {
   220  			return 0, fmt.Errorf("unable to reuse endpoint ID: %w", err)
   221  		}
   222  		newID = currID
   223  	} else {
   224  		id := mgr.epIDAllocator.allocate()
   225  		if id == uint16(0) {
   226  			return 0, fmt.Errorf("no more endpoint IDs available")
   227  		}
   228  		newID = id
   229  	}
   230  
   231  	return newID, nil
   232  }
   233  
   234  func (mgr *endpointManager) removeIDLocked(currID uint16) {
   235  	delete(mgr.endpoints, currID)
   236  }
   237  
   238  // RemoveID removes the id from the endpoints map in the endpointManager.
   239  func (mgr *endpointManager) RemoveID(currID uint16) {
   240  	mgr.mutex.Lock()
   241  	defer mgr.mutex.Unlock()
   242  	mgr.removeIDLocked(currID)
   243  }
   244  
   245  // Lookup looks up the endpoint by prefix id
   246  func (mgr *endpointManager) Lookup(id string) (*endpoint.Endpoint, error) {
   247  	mgr.mutex.RLock()
   248  	defer mgr.mutex.RUnlock()
   249  
   250  	prefix, eid, err := endpointid.Parse(id)
   251  	if err != nil {
   252  		return nil, err
   253  	}
   254  
   255  	switch prefix {
   256  	case endpointid.CiliumLocalIdPrefix:
   257  		n, err := endpointid.ParseCiliumID(id)
   258  		if err != nil {
   259  			return nil, err
   260  		}
   261  		if n > endpointid.MaxEndpointID {
   262  			return nil, fmt.Errorf("%d: endpoint ID too large", n)
   263  		}
   264  		return mgr.lookupCiliumID(uint16(n)), nil
   265  
   266  	case endpointid.CiliumGlobalIdPrefix:
   267  		return nil, ErrUnsupportedID
   268  
   269  	case endpointid.CNIAttachmentIdPrefix:
   270  		return mgr.lookupCNIAttachmentID(eid), nil
   271  
   272  	case endpointid.ContainerIdPrefix:
   273  		return mgr.lookupContainerID(eid), nil
   274  
   275  	case endpointid.DockerEndpointPrefix:
   276  		return mgr.lookupDockerEndpoint(eid), nil
   277  
   278  	case endpointid.ContainerNamePrefix:
   279  		return mgr.lookupDockerContainerName(eid), nil
   280  
   281  	case endpointid.PodNamePrefix:
   282  		return mgr.lookupPodNameLocked(eid), nil
   283  
   284  	case endpointid.CEPNamePrefix:
   285  		return mgr.lookupCEPNameLocked(eid), nil
   286  
   287  	case endpointid.IPv4Prefix:
   288  		return mgr.lookupIPv4(eid), nil
   289  
   290  	case endpointid.IPv6Prefix:
   291  		return mgr.lookupIPv6(eid), nil
   292  
   293  	default:
   294  		return nil, ErrInvalidPrefix{InvalidPrefix: prefix.String()}
   295  	}
   296  }
   297  
   298  // LookupCiliumID looks up endpoint by endpoint ID
   299  func (mgr *endpointManager) LookupCiliumID(id uint16) *endpoint.Endpoint {
   300  	mgr.mutex.RLock()
   301  	ep := mgr.lookupCiliumID(id)
   302  	mgr.mutex.RUnlock()
   303  	return ep
   304  }
   305  
   306  // LookupCNIAttachmentID looks up endpoint by CNI attachment ID
   307  func (mgr *endpointManager) LookupCNIAttachmentID(id string) *endpoint.Endpoint {
   308  	mgr.mutex.RLock()
   309  	ep := mgr.lookupCNIAttachmentID(id)
   310  	mgr.mutex.RUnlock()
   311  	return ep
   312  }
   313  
   314  // LookupIPv4 looks up endpoint by IPv4 address
   315  func (mgr *endpointManager) LookupIPv4(ipv4 string) *endpoint.Endpoint {
   316  	mgr.mutex.RLock()
   317  	ep := mgr.lookupIPv4(ipv4)
   318  	mgr.mutex.RUnlock()
   319  	return ep
   320  }
   321  
   322  // LookupIPv6 looks up endpoint by IPv6 address
   323  func (mgr *endpointManager) LookupIPv6(ipv6 string) *endpoint.Endpoint {
   324  	mgr.mutex.RLock()
   325  	ep := mgr.lookupIPv6(ipv6)
   326  	mgr.mutex.RUnlock()
   327  	return ep
   328  }
   329  
   330  // LookupIP looks up endpoint by IP address
   331  func (mgr *endpointManager) LookupIP(ip netip.Addr) (ep *endpoint.Endpoint) {
   332  	ipStr := ip.Unmap().String()
   333  	mgr.mutex.RLock()
   334  	if ip.Is4() {
   335  		ep = mgr.lookupIPv4(ipStr)
   336  	} else {
   337  		ep = mgr.lookupIPv6(ipStr)
   338  	}
   339  	mgr.mutex.RUnlock()
   340  	return ep
   341  }
   342  
   343  // LookupCEPName looks up an endpoint by its K8s namespace + cep name
   344  func (mgr *endpointManager) LookupCEPName(namespacedName string) *endpoint.Endpoint {
   345  	mgr.mutex.RLock()
   346  	ep := mgr.lookupCEPNameLocked(namespacedName)
   347  	mgr.mutex.RUnlock()
   348  	return ep
   349  }
   350  
   351  // GetEndpointsByPodName looks up endpoints by namespace + pod name
   352  func (mgr *endpointManager) GetEndpointsByPodName(namespacedName string) []*endpoint.Endpoint {
   353  	mgr.mutex.RLock()
   354  	defer mgr.mutex.RUnlock()
   355  	eps := make([]*endpoint.Endpoint, 0, 1)
   356  	for _, ep := range mgr.endpoints {
   357  		if ep.GetK8sNamespaceAndPodName() == namespacedName {
   358  			eps = append(eps, ep)
   359  		}
   360  	}
   361  
   362  	return eps
   363  }
   364  
   365  // GetEndpointsByContainerID looks up endpoints by container ID
   366  func (mgr *endpointManager) GetEndpointsByContainerID(containerID string) []*endpoint.Endpoint {
   367  	mgr.mutex.RLock()
   368  	defer mgr.mutex.RUnlock()
   369  
   370  	eps := make([]*endpoint.Endpoint, 0, 1)
   371  	for _, ep := range mgr.endpoints {
   372  		if ep.GetContainerID() == containerID {
   373  			eps = append(eps, ep)
   374  		}
   375  	}
   376  	return eps
   377  }
   378  
   379  // ReleaseID releases the ID of the specified endpoint from the endpointManager.
   380  // Returns an error if the ID cannot be released.
   381  func (mgr *endpointManager) ReleaseID(ep *endpoint.Endpoint) error {
   382  	return mgr.epIDAllocator.release(ep.ID)
   383  }
   384  
   385  // unexpose removes the endpoint from the endpointmanager, so subsequent
   386  // lookups will no longer find the endpoint.
   387  func (mgr *endpointManager) unexpose(ep *endpoint.Endpoint) {
   388  	defer ep.Close()
   389  	identifiers := ep.Identifiers()
   390  
   391  	previousState := ep.GetState()
   392  
   393  	mgr.mutex.Lock()
   394  	defer mgr.mutex.Unlock()
   395  
   396  	// This must be done before the ID is released for the endpoint!
   397  	mgr.removeIDLocked(ep.ID)
   398  	mgr.mcastManager.RemoveAddress(ep.IPv6)
   399  
   400  	// We haven't yet allocated the ID for a restoring endpoint, so no
   401  	// need to release it.
   402  	if previousState != endpoint.StateRestoring {
   403  		if err := mgr.ReleaseID(ep); err != nil {
   404  			log.WithError(err).WithFields(logrus.Fields{
   405  				"state":                   previousState,
   406  				logfields.CNIAttachmentID: identifiers[endpointid.CNIAttachmentIdPrefix],
   407  				logfields.CEPName:         identifiers[endpointid.CEPNamePrefix],
   408  			}).Warning("Unable to release endpoint ID")
   409  		}
   410  	}
   411  
   412  	mgr.removeReferencesLocked(identifiers)
   413  }
   414  
   415  // removeEndpoint stops the active handling of events by the specified endpoint,
   416  // and prevents the endpoint from being globally acccessible via other packages.
   417  func (mgr *endpointManager) removeEndpoint(ep *endpoint.Endpoint, conf endpoint.DeleteConfig) []error {
   418  	mgr.unexpose(ep)
   419  	result := ep.Delete(conf)
   420  
   421  	mgr.mutex.RLock()
   422  	for s := range mgr.subscribers {
   423  		s.EndpointDeleted(ep, conf)
   424  	}
   425  	mgr.mutex.RUnlock()
   426  
   427  	return result
   428  }
   429  
   430  // RemoveEndpoint stops the active handling of events by the specified endpoint,
   431  // and prevents the endpoint from being globally acccessible via other packages.
   432  func (mgr *endpointManager) RemoveEndpoint(ep *endpoint.Endpoint, conf endpoint.DeleteConfig) []error {
   433  	return mgr.deleteEndpoint(ep, conf)
   434  }
   435  
   436  // lookupCiliumID looks up endpoint by endpoint ID
   437  func (mgr *endpointManager) lookupCiliumID(id uint16) *endpoint.Endpoint {
   438  	if ep, ok := mgr.endpoints[id]; ok {
   439  		return ep
   440  	}
   441  	return nil
   442  }
   443  
   444  func (mgr *endpointManager) lookupDockerEndpoint(id string) *endpoint.Endpoint {
   445  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.DockerEndpointPrefix, id)]; ok {
   446  		return ep
   447  	}
   448  	return nil
   449  }
   450  
   451  func (mgr *endpointManager) lookupPodNameLocked(name string) *endpoint.Endpoint {
   452  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.PodNamePrefix, name)]; ok {
   453  		return ep
   454  	}
   455  	return nil
   456  }
   457  
   458  func (mgr *endpointManager) lookupCEPNameLocked(name string) *endpoint.Endpoint {
   459  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.CEPNamePrefix, name)]; ok {
   460  		return ep
   461  	}
   462  	return nil
   463  }
   464  
   465  func (mgr *endpointManager) lookupDockerContainerName(name string) *endpoint.Endpoint {
   466  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.ContainerNamePrefix, name)]; ok {
   467  		return ep
   468  	}
   469  	return nil
   470  }
   471  
   472  func (mgr *endpointManager) lookupIPv4(ipv4 string) *endpoint.Endpoint {
   473  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.IPv4Prefix, ipv4)]; ok {
   474  		return ep
   475  	}
   476  	return nil
   477  }
   478  
   479  func (mgr *endpointManager) lookupIPv6(ipv6 string) *endpoint.Endpoint {
   480  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.IPv6Prefix, ipv6)]; ok {
   481  		return ep
   482  	}
   483  	return nil
   484  }
   485  
   486  func (mgr *endpointManager) lookupContainerID(id string) *endpoint.Endpoint {
   487  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.ContainerIdPrefix, id)]; ok {
   488  		return ep
   489  	}
   490  	return nil
   491  }
   492  
   493  func (mgr *endpointManager) lookupCNIAttachmentID(id string) *endpoint.Endpoint {
   494  	if ep, ok := mgr.endpointsAux[endpointid.NewID(endpointid.CNIAttachmentIdPrefix, id)]; ok {
   495  		return ep
   496  	}
   497  	return nil
   498  }
   499  
   500  // updateIDReferenceLocked updates the endpoints map in the endpointManager for
   501  // the given Endpoint.
   502  func (mgr *endpointManager) updateIDReferenceLocked(ep *endpoint.Endpoint) {
   503  	if ep == nil {
   504  		return
   505  	}
   506  	mgr.endpoints[ep.ID] = ep
   507  }
   508  
   509  func (mgr *endpointManager) updateReferencesLocked(ep *endpoint.Endpoint, identifiers endpointid.Identifiers) {
   510  	for k := range identifiers {
   511  		id := endpointid.NewID(k, identifiers[k])
   512  		mgr.endpointsAux[id] = ep
   513  	}
   514  }
   515  
   516  // UpdateReferences updates maps the contents of mappings to the specified endpoint.
   517  func (mgr *endpointManager) UpdateReferences(ep *endpoint.Endpoint) error {
   518  	mgr.mutex.Lock()
   519  	defer mgr.mutex.Unlock()
   520  
   521  	identifiers := ep.Identifiers()
   522  	mgr.updateReferencesLocked(ep, identifiers)
   523  
   524  	return nil
   525  }
   526  
   527  // removeReferencesLocked removes the mappings from the endpointmanager.
   528  func (mgr *endpointManager) removeReferencesLocked(identifiers endpointid.Identifiers) {
   529  	for prefix := range identifiers {
   530  		id := endpointid.NewID(prefix, identifiers[prefix])
   531  		delete(mgr.endpointsAux, id)
   532  	}
   533  }
   534  
   535  // RegenerateAllEndpoints calls a setState for each endpoint and
   536  // regenerates if state transaction is valid. During this process, the endpoint
   537  // list is locked and cannot be modified.
   538  // Returns a waiting group that can be used to know when all the endpoints are
   539  // regenerated.
   540  func (mgr *endpointManager) RegenerateAllEndpoints(regenMetadata *regeneration.ExternalRegenerationMetadata) *sync.WaitGroup {
   541  	var wg sync.WaitGroup
   542  
   543  	eps := mgr.GetEndpoints()
   544  	wg.Add(len(eps))
   545  
   546  	// Dereference "reason" field outside of logging statement; see
   547  	// https://github.com/sirupsen/logrus/issues/1003.
   548  	reason := regenMetadata.Reason
   549  	log.WithFields(logrus.Fields{"reason": reason}).Info("regenerating all endpoints")
   550  	for _, ep := range eps {
   551  		go func(ep *endpoint.Endpoint) {
   552  			<-ep.RegenerateIfAlive(regenMetadata)
   553  			wg.Done()
   554  		}(ep)
   555  	}
   556  
   557  	return &wg
   558  }
   559  
   560  // OverrideEndpointOpts applies the given options to all endpoints.
   561  func (mgr *endpointManager) OverrideEndpointOpts(om option.OptionMap) {
   562  	for _, ep := range mgr.GetEndpoints() {
   563  		if _, err := ep.ApplyOpts(om); err != nil && !errors.Is(err, endpoint.ErrNotAlive) {
   564  			log.WithError(err).WithFields(logrus.Fields{
   565  				"ep": ep.GetID(),
   566  			}).Error("Override endpoint options failed")
   567  		}
   568  	}
   569  }
   570  
   571  // HasGlobalCT returns true if the endpoints have a global CT, false otherwise.
   572  func (mgr *endpointManager) HasGlobalCT() bool {
   573  	eps := mgr.GetEndpoints()
   574  	for _, e := range eps {
   575  		if !e.Options.IsEnabled(option.ConntrackLocal) {
   576  			return true
   577  		}
   578  	}
   579  	return false
   580  }
   581  
   582  // GetEndpoints returns a slice of all endpoints present in endpoint manager.
   583  func (mgr *endpointManager) GetEndpoints() []*endpoint.Endpoint {
   584  	mgr.mutex.RLock()
   585  	eps := make([]*endpoint.Endpoint, 0, len(mgr.endpoints))
   586  	for _, ep := range mgr.endpoints {
   587  		eps = append(eps, ep)
   588  	}
   589  	mgr.mutex.RUnlock()
   590  	return eps
   591  }
   592  
   593  // GetPolicyEndpoints returns a map of all endpoints present in endpoint
   594  // manager as policy.Endpoint interface set for the map key.
   595  func (mgr *endpointManager) GetPolicyEndpoints() map[policy.Endpoint]struct{} {
   596  	mgr.mutex.RLock()
   597  	eps := make(map[policy.Endpoint]struct{}, len(mgr.endpoints))
   598  	for _, ep := range mgr.endpoints {
   599  		eps[ep] = struct{}{}
   600  	}
   601  	mgr.mutex.RUnlock()
   602  	return eps
   603  }
   604  
   605  func (mgr *endpointManager) expose(ep *endpoint.Endpoint) error {
   606  	newID, err := mgr.allocateID(ep.ID)
   607  	if err != nil {
   608  		return err
   609  	}
   610  
   611  	mgr.mutex.Lock()
   612  	// Get a copy of the identifiers before exposing the endpoint
   613  	identifiers := ep.Identifiers()
   614  	ep.PolicyMapPressureUpdater = mgr.policyMapPressure
   615  	ep.Start(newID)
   616  	mgr.mcastManager.AddAddress(ep.IPv6)
   617  	mgr.updateIDReferenceLocked(ep)
   618  	mgr.updateReferencesLocked(ep, identifiers)
   619  	mgr.mutex.Unlock()
   620  
   621  	ep.InitEndpointHealth(mgr.health)
   622  	mgr.RunK8sCiliumEndpointSync(ep, ep.GetReporter("cep-k8s-sync"))
   623  
   624  	return nil
   625  }
   626  
   627  // RestoreEndpoint exposes the specified endpoint to other subsystems via the
   628  // manager.
   629  func (mgr *endpointManager) RestoreEndpoint(ep *endpoint.Endpoint) error {
   630  	ep.SetDefaultConfiguration()
   631  	err := mgr.expose(ep)
   632  	if err != nil {
   633  		return err
   634  	}
   635  	mgr.mutex.RLock()
   636  	// Unlock the mutex after reading the subscribers list to not block
   637  	// endpoint restore operation. This could potentially mean that
   638  	// subscribers are called even after they've unsubscribed. However,
   639  	// consumers unsubscribe during the tear down phase so the restore
   640  	// callbacks may likely not race with unsubscribe calls.
   641  	subscribers := maps.Clone(mgr.subscribers)
   642  	mgr.mutex.RUnlock()
   643  	for s := range subscribers {
   644  		s.EndpointRestored(ep)
   645  	}
   646  
   647  	return nil
   648  }
   649  
   650  // AddEndpoint takes the prepared endpoint object and starts managing it.
   651  func (mgr *endpointManager) AddEndpoint(owner regeneration.Owner, ep *endpoint.Endpoint) (err error) {
   652  	if ep.ID != 0 {
   653  		return fmt.Errorf("Endpoint ID is already set to %d", ep.ID)
   654  	}
   655  
   656  	// Updating logger to re-populate pod fields
   657  	// when endpoint and its logger are created pod details are not populated
   658  	// and all subsequent logs have empty pod details like ip addresses, k8sPodName
   659  	// this update will populate pod details in logger
   660  	ep.UpdateLogger(map[string]interface{}{
   661  		logfields.ContainerID: ep.GetShortContainerID(),
   662  		logfields.IPv4:        ep.GetIPv4Address(),
   663  		logfields.IPv6:        ep.GetIPv6Address(),
   664  		logfields.K8sPodName:  ep.GetK8sNamespaceAndPodName(),
   665  		logfields.CEPName:     ep.GetK8sNamespaceAndCEPName(),
   666  	})
   667  
   668  	err = mgr.expose(ep)
   669  	if err != nil {
   670  		return err
   671  	}
   672  
   673  	mgr.mutex.RLock()
   674  	for s := range mgr.subscribers {
   675  		s.EndpointCreated(ep)
   676  	}
   677  	mgr.mutex.RUnlock()
   678  
   679  	return nil
   680  }
   681  
   682  func (mgr *endpointManager) AddIngressEndpoint(
   683  	ctx context.Context,
   684  	owner regeneration.Owner,
   685  	policyGetter policyRepoGetter,
   686  	ipcache *ipcache.IPCache,
   687  	proxy endpoint.EndpointProxy,
   688  	allocator cache.IdentityAllocator,
   689  ) error {
   690  	ep, err := endpoint.CreateIngressEndpoint(owner, policyGetter, ipcache, proxy, allocator)
   691  	if err != nil {
   692  		return err
   693  	}
   694  
   695  	if err := mgr.AddEndpoint(owner, ep); err != nil {
   696  		return err
   697  	}
   698  
   699  	ep.InitWithIngressLabels(ctx, launchTime)
   700  
   701  	return nil
   702  }
   703  
   704  func (mgr *endpointManager) AddHostEndpoint(
   705  	ctx context.Context,
   706  	owner regeneration.Owner,
   707  	policyGetter policyRepoGetter,
   708  	ipcache *ipcache.IPCache,
   709  	proxy endpoint.EndpointProxy,
   710  	allocator cache.IdentityAllocator,
   711  ) error {
   712  	ep, err := endpoint.CreateHostEndpoint(owner, policyGetter, ipcache, proxy, allocator)
   713  	if err != nil {
   714  		return err
   715  	}
   716  
   717  	if err := mgr.AddEndpoint(owner, ep); err != nil {
   718  		return err
   719  	}
   720  
   721  	node.SetEndpointID(ep.GetID())
   722  
   723  	mgr.initHostEndpointLabels(ctx, ep)
   724  
   725  	return nil
   726  }
   727  
   728  type policyRepoGetter interface {
   729  	GetPolicyRepository() *policy.Repository
   730  }
   731  
   732  // InitHostEndpointLabels initializes the host endpoint's labels with the
   733  // node's known labels.
   734  func (mgr *endpointManager) InitHostEndpointLabels(ctx context.Context) {
   735  	ep := mgr.GetHostEndpoint()
   736  	if ep == nil {
   737  		log.Error("Attempted to init host endpoint labels but host endpoint not set.")
   738  		return
   739  	}
   740  
   741  	mgr.initHostEndpointLabels(ctx, ep)
   742  }
   743  
   744  func (mgr *endpointManager) initHostEndpointLabels(ctx context.Context, ep *endpoint.Endpoint) {
   745  	// initHostEndpointLabels is executed by the daemon start hook, and
   746  	// at that point we are guaranteed that the local node has already
   747  	// been initialized, and this Get() operation returns immediately.
   748  	ln, err := mgr.localNodeStore.Get(ctx)
   749  	if err != nil {
   750  		// An error may be returned here only if the context has been canceled,
   751  		// which means that we are already shutting down. In that case, let's
   752  		// just return immediately, as we cannot do anything else.
   753  		return
   754  	}
   755  
   756  	ep.InitWithNodeLabels(ctx, ln.Labels, launchTime)
   757  
   758  	// Start the observer to keep the labels synchronized in case they change
   759  	mgr.startNodeLabelsObserver(ln.Labels)
   760  }
   761  
   762  // WaitForEndpointsAtPolicyRev waits for all endpoints which existed at the time
   763  // this function is called to be at a given policy revision.
   764  // New endpoints appearing while waiting are ignored.
   765  func (mgr *endpointManager) WaitForEndpointsAtPolicyRev(ctx context.Context, rev uint64) error {
   766  	eps := mgr.GetEndpoints()
   767  	for i := range eps {
   768  		select {
   769  		case <-ctx.Done():
   770  			return ctx.Err()
   771  		case <-eps[i].WaitForPolicyRevision(ctx, rev, nil):
   772  			if ctx.Err() != nil {
   773  				return ctx.Err()
   774  			}
   775  		}
   776  	}
   777  	return nil
   778  }
   779  
   780  // CallbackForEndpointsAtPolicyRev registers a callback on all endpoints that
   781  // exist when invoked. It is similar to WaitForEndpointsAtPolicyRevision but
   782  // each endpoint that reaches the desired revision calls 'done' independently.
   783  // The provided callback should not block and generally be lightweight.
   784  func (mgr *endpointManager) CallbackForEndpointsAtPolicyRev(ctx context.Context, rev uint64, done func(time.Time)) error {
   785  	eps := mgr.GetEndpoints()
   786  	for i := range eps {
   787  		eps[i].WaitForPolicyRevision(ctx, rev, done)
   788  	}
   789  	return nil
   790  }
   791  
   792  // EndpointExists returns whether the endpoint with id exists.
   793  func (mgr *endpointManager) EndpointExists(id uint16) bool {
   794  	return mgr.LookupCiliumID(id) != nil
   795  }
   796  
   797  // GetEndpointNetnsCookieByIP returns the netns cookie for the passed endpoint with ip address if found.
   798  func (mgr *endpointManager) GetEndpointNetnsCookieByIP(ip netip.Addr) (uint64, error) {
   799  	ep := mgr.LookupIP(ip)
   800  	if ep == nil {
   801  		return 0, fmt.Errorf("endpoint not found by ip %v", ip)
   802  	}
   803  
   804  	return ep.GetEndpointNetnsCookie(), nil
   805  }