github.com/cilium/cilium@v1.16.2/pkg/node/manager/manager.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package manager
     5  
     6  import (
     7  	"bufio"
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"io/fs"
    12  	"math/rand/v2"
    13  	"net"
    14  	"net/netip"
    15  	"os"
    16  	"path/filepath"
    17  	"slices"
    18  	"sync"
    19  
    20  	"github.com/cilium/hive/cell"
    21  	"github.com/cilium/workerpool"
    22  	"github.com/google/renameio/v2"
    23  	jsoniter "github.com/json-iterator/go"
    24  	"github.com/prometheus/client_golang/prometheus"
    25  	"github.com/sirupsen/logrus"
    26  	"golang.org/x/time/rate"
    27  
    28  	"github.com/cilium/cilium/pkg/backoff"
    29  	"github.com/cilium/cilium/pkg/controller"
    30  	"github.com/cilium/cilium/pkg/datapath/iptables/ipset"
    31  	datapath "github.com/cilium/cilium/pkg/datapath/types"
    32  	"github.com/cilium/cilium/pkg/identity"
    33  	"github.com/cilium/cilium/pkg/inctimer"
    34  	"github.com/cilium/cilium/pkg/ip"
    35  	"github.com/cilium/cilium/pkg/ipcache"
    36  	ipcacheTypes "github.com/cilium/cilium/pkg/ipcache/types"
    37  	"github.com/cilium/cilium/pkg/labels"
    38  	"github.com/cilium/cilium/pkg/labelsfilter"
    39  	"github.com/cilium/cilium/pkg/lock"
    40  	"github.com/cilium/cilium/pkg/logging/logfields"
    41  	"github.com/cilium/cilium/pkg/metrics"
    42  	"github.com/cilium/cilium/pkg/metrics/metric"
    43  	"github.com/cilium/cilium/pkg/node"
    44  	"github.com/cilium/cilium/pkg/node/addressing"
    45  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    46  	"github.com/cilium/cilium/pkg/option"
    47  	"github.com/cilium/cilium/pkg/source"
    48  	"github.com/cilium/cilium/pkg/time"
    49  	"github.com/cilium/cilium/pkg/trigger"
    50  	"github.com/cilium/cilium/pkg/wireguard/types"
    51  )
    52  
    53  const (
    54  	// The filename for the nodes checkpoint. This is periodically written, and
    55  	// restored on restart. The default path is /run/cilium/state/nodes.json
    56  	nodesFilename = "nodes.json"
    57  	// Minimum amount of time to wait in between writing nodes file.
    58  	nodeCheckpointMinInterval = time.Minute
    59  )
    60  
    61  var (
    62  	baseBackgroundSyncInterval = time.Minute
    63  	defaultNodeUpdateInterval  = 10 * time.Second
    64  
    65  	neighborTableRefreshControllerGroup = controller.NewGroup("neighbor-table-refresh")
    66  	neighborTableUpdateControllerGroup  = controller.NewGroup("neighbor-table-update")
    67  )
    68  
    69  const (
    70  	numBackgroundWorkers = 1
    71  )
    72  
    73  type nodeEntry struct {
    74  	// mutex serves two purposes:
    75  	// 1. Serialize any direct access to the node field in this entry.
    76  	// 2. Serialize all calls do the datapath layer for a particular node.
    77  	//
    78  	// See description of Manager.mutex for more details
    79  	//
    80  	// If both the nodeEntry.mutex and Manager.mutex must be held, then the
    81  	// Manager.mutex must *always* be acquired first.
    82  	mutex lock.Mutex
    83  	node  nodeTypes.Node
    84  }
    85  
    86  // IPCache is the set of interactions the node manager performs with the ipcache
    87  type IPCache interface {
    88  	GetMetadataSourceByPrefix(prefix netip.Prefix) source.Source
    89  	UpsertMetadata(prefix netip.Prefix, src source.Source, resource ipcacheTypes.ResourceID, aux ...ipcache.IPMetadata)
    90  	OverrideIdentity(prefix netip.Prefix, identityLabels labels.Labels, src source.Source, resource ipcacheTypes.ResourceID)
    91  	RemoveMetadata(prefix netip.Prefix, resource ipcacheTypes.ResourceID, aux ...ipcache.IPMetadata)
    92  	RemoveIdentityOverride(prefix netip.Prefix, identityLabels labels.Labels, resource ipcacheTypes.ResourceID)
    93  }
    94  
    95  // IPSetFilterFn is a function allowing to optionally filter out the insertion
    96  // of IPSet entries based on node characteristics. The insertion is performed
    97  // if the function returns false, and skipped otherwise.
    98  type IPSetFilterFn func(*nodeTypes.Node) bool
    99  
   100  var _ Notifier = (*manager)(nil)
   101  
   102  // manager is the entity that manages a collection of nodes
   103  type manager struct {
   104  	// mutex is the lock protecting access to the nodes map. The mutex must
   105  	// be held for any access of the nodes map.
   106  	//
   107  	// The manager mutex works together with the entry mutex in the
   108  	// following way to minimize the duration the manager mutex is held:
   109  	//
   110  	// 1. Acquire manager mutex to safely access nodes map and to retrieve
   111  	//    node entry.
   112  	// 2. Acquire mutex of the entry while the manager mutex is still held.
   113  	//    This guarantees that no change to the entry has happened.
   114  	// 3. Release of the manager mutex to unblock changes or reads to other
   115  	//    node entries.
   116  	// 4. Change of entry data or performing of datapath interactions
   117  	// 5. Release of the entry mutex
   118  	//
   119  	// If both the nodeEntry.mutex and Manager.mutex must be held, then the
   120  	// Manager.mutex must *always* be acquired first.
   121  	mutex lock.RWMutex
   122  
   123  	// nodes is the list of nodes. Access must be protected via mutex.
   124  	nodes map[nodeTypes.Identity]*nodeEntry
   125  
   126  	// Upon agent startup, this is filled with nodes as read from disk. Used to
   127  	// synthesize node deletion events for nodes which disappeared while we were
   128  	// down.
   129  	restoredNodes map[nodeTypes.Identity]*nodeTypes.Node
   130  
   131  	// nodeHandlersMu protects the nodeHandlers map against concurrent access.
   132  	nodeHandlersMu lock.RWMutex
   133  	// nodeHandlers has a slice containing all node handlers subscribed to node
   134  	// events.
   135  	nodeHandlers map[datapath.NodeHandler]struct{}
   136  
   137  	// workerpool manages background workers
   138  	workerpool *workerpool.WorkerPool
   139  
   140  	// metrics to track information about the node manager
   141  	metrics *nodeMetrics
   142  
   143  	// conf is the configuration of the caller passed in via NewManager.
   144  	// This field is immutable after NewManager()
   145  	conf *option.DaemonConfig
   146  
   147  	// ipcache is the set operations performed against the ipcache
   148  	ipcache IPCache
   149  
   150  	// ipsetMgr is the ipset cluster nodes configuration manager
   151  	ipsetMgr         ipset.Manager
   152  	ipsetInitializer ipset.Initializer
   153  	ipsetFilter      IPSetFilterFn
   154  
   155  	// controllerManager manages the controllers that are launched within the
   156  	// Manager.
   157  	controllerManager *controller.Manager
   158  
   159  	// health reports on the current health status of the node manager module.
   160  	health cell.Health
   161  
   162  	// nodeNeighborQueue tracks node neighbor link updates.
   163  	nodeNeighborQueue queue[nodeQueueEntry]
   164  
   165  	// nodeCheckpointer triggers writing the current set of nodes to disk
   166  	nodeCheckpointer *trigger.Trigger
   167  	checkpointerDone chan struct{} // Closed once the checkpointer is shut down.
   168  
   169  	// Ensure the pruning is only attempted once.
   170  	nodePruneOnce sync.Once
   171  }
   172  
   173  type nodeQueueEntry struct {
   174  	node    *nodeTypes.Node
   175  	refresh bool
   176  }
   177  
   178  // Enqueue add a node to a controller managed queue which sets up the neighbor link.
   179  func (m *manager) Enqueue(n *nodeTypes.Node, refresh bool) {
   180  	if n == nil {
   181  		log.WithFields(logrus.Fields{
   182  			logfields.LogSubsys: "enqueue",
   183  		}).Warn("Skipping nodeNeighbor insert: No node given")
   184  	}
   185  	m.nodeNeighborQueue.push(&nodeQueueEntry{node: n, refresh: refresh})
   186  }
   187  
   188  // Subscribe subscribes the given node handler to node events.
   189  func (m *manager) Subscribe(nh datapath.NodeHandler) {
   190  	m.nodeHandlersMu.Lock()
   191  	m.nodeHandlers[nh] = struct{}{}
   192  	m.nodeHandlersMu.Unlock()
   193  	// Add all nodes already received by the manager.
   194  	m.mutex.RLock()
   195  	for _, v := range m.nodes {
   196  		v.mutex.Lock()
   197  		if err := nh.NodeAdd(v.node); err != nil {
   198  			log.WithFields(logrus.Fields{
   199  				"handler": nh.Name(),
   200  				"node":    v.node.Name,
   201  			}).WithError(err).Error("Failed applying node handler following initial subscribe. Cilium may have degraded functionality. See error message for more details.")
   202  		}
   203  		v.mutex.Unlock()
   204  	}
   205  	m.mutex.RUnlock()
   206  }
   207  
   208  // Unsubscribe unsubscribes the given node handler with node events.
   209  func (m *manager) Unsubscribe(nh datapath.NodeHandler) {
   210  	m.nodeHandlersMu.Lock()
   211  	delete(m.nodeHandlers, nh)
   212  	m.nodeHandlersMu.Unlock()
   213  }
   214  
   215  // Iter executes the given function in all subscribed node handlers.
   216  func (m *manager) Iter(f func(nh datapath.NodeHandler)) {
   217  	m.nodeHandlersMu.RLock()
   218  	defer m.nodeHandlersMu.RUnlock()
   219  
   220  	for nh := range m.nodeHandlers {
   221  		f(nh)
   222  	}
   223  }
   224  
   225  type nodeMetrics struct {
   226  	// metricEventsReceived is the prometheus metric to track the number of
   227  	// node events received
   228  	EventsReceived metric.Vec[metric.Counter]
   229  
   230  	// metricNumNodes is the prometheus metric to track the number of nodes
   231  	// being managed
   232  	NumNodes metric.Gauge
   233  
   234  	// metricDatapathValidations is the prometheus metric to track the
   235  	// number of datapath node validation calls
   236  	DatapathValidations metric.Counter
   237  }
   238  
   239  // ProcessNodeDeletion upon node deletion ensures metrics associated
   240  // with the deleted node are no longer reported.
   241  // Notably for metrics node connectivity status and latency metrics
   242  func (*nodeMetrics) ProcessNodeDeletion(clusterName, nodeName string) {
   243  	// Removes all connectivity status associated with the deleted node.
   244  	_ = metrics.NodeConnectivityStatus.DeletePartialMatch(prometheus.Labels{
   245  		metrics.LabelSourceCluster:  clusterName,
   246  		metrics.LabelSourceNodeName: nodeName,
   247  	})
   248  	_ = metrics.NodeConnectivityStatus.DeletePartialMatch(prometheus.Labels{
   249  		metrics.LabelTargetCluster:  clusterName,
   250  		metrics.LabelTargetNodeName: nodeName,
   251  	})
   252  
   253  	// Removes all connectivity latency associated with the deleted node.
   254  	_ = metrics.NodeConnectivityLatency.DeletePartialMatch(prometheus.Labels{
   255  		metrics.LabelSourceCluster:  clusterName,
   256  		metrics.LabelSourceNodeName: nodeName,
   257  	})
   258  	_ = metrics.NodeConnectivityLatency.DeletePartialMatch(prometheus.Labels{
   259  		metrics.LabelTargetCluster:  clusterName,
   260  		metrics.LabelTargetNodeName: nodeName,
   261  	})
   262  }
   263  
   264  func NewNodeMetrics() *nodeMetrics {
   265  	return &nodeMetrics{
   266  		EventsReceived: metric.NewCounterVec(metric.CounterOpts{
   267  			ConfigName: metrics.Namespace + "_" + "nodes_all_events_received_total",
   268  			Namespace:  metrics.Namespace,
   269  			Subsystem:  "nodes",
   270  			Name:       "all_events_received_total",
   271  			Help:       "Number of node events received",
   272  		}, []string{"event_type", "source"}),
   273  
   274  		NumNodes: metric.NewGauge(metric.GaugeOpts{
   275  			ConfigName: metrics.Namespace + "_" + "nodes_all_num",
   276  			Namespace:  metrics.Namespace,
   277  			Subsystem:  "nodes",
   278  			Name:       "all_num",
   279  			Help:       "Number of nodes managed",
   280  		}),
   281  
   282  		DatapathValidations: metric.NewCounter(metric.CounterOpts{
   283  			ConfigName: metrics.Namespace + "_" + "nodes_all_datapath_validations_total",
   284  			Namespace:  metrics.Namespace,
   285  			Subsystem:  "nodes",
   286  			Name:       "all_datapath_validations_total",
   287  			Help:       "Number of validation calls to implement the datapath implementation of a node",
   288  		}),
   289  	}
   290  }
   291  
   292  // New returns a new node manager
   293  func New(c *option.DaemonConfig, ipCache IPCache, ipsetMgr ipset.Manager, ipsetFilter IPSetFilterFn, nodeMetrics *nodeMetrics, health cell.Health) (*manager, error) {
   294  	if ipsetFilter == nil {
   295  		ipsetFilter = func(*nodeTypes.Node) bool { return false }
   296  	}
   297  
   298  	m := &manager{
   299  		nodes:             map[nodeTypes.Identity]*nodeEntry{},
   300  		restoredNodes:     map[nodeTypes.Identity]*nodeTypes.Node{},
   301  		conf:              c,
   302  		controllerManager: controller.NewManager(),
   303  		nodeHandlers:      map[datapath.NodeHandler]struct{}{},
   304  		ipcache:           ipCache,
   305  		ipsetMgr:          ipsetMgr,
   306  		ipsetInitializer:  ipsetMgr.NewInitializer(),
   307  		ipsetFilter:       ipsetFilter,
   308  		metrics:           nodeMetrics,
   309  		health:            health,
   310  	}
   311  
   312  	return m, nil
   313  }
   314  
   315  func (m *manager) Start(cell.HookContext) error {
   316  	m.workerpool = workerpool.New(numBackgroundWorkers)
   317  
   318  	// Ensure that we read a potential nodes file before we overwrite it.
   319  	m.restoreNodeCheckpoint()
   320  	if err := m.initNodeCheckpointer(nodeCheckpointMinInterval); err != nil {
   321  		return fmt.Errorf("failed to initialize node file writer: %w", err)
   322  	}
   323  
   324  	return m.workerpool.Submit("backgroundSync", m.backgroundSync)
   325  }
   326  
   327  // Stop shuts down a node manager
   328  func (m *manager) Stop(cell.HookContext) error {
   329  	if m.workerpool != nil {
   330  		if err := m.workerpool.Close(); err != nil {
   331  			return err
   332  		}
   333  	}
   334  
   335  	m.mutex.Lock()
   336  	defer m.mutex.Unlock()
   337  
   338  	if m.nodeCheckpointer != nil {
   339  		// Using the shutdown func of trigger to checkpoint would block shutdown
   340  		// for up to its MinInterval, which is too long.
   341  		m.nodeCheckpointer.Shutdown()
   342  		close(m.checkpointerDone)
   343  		err := m.checkpoint()
   344  		if err != nil {
   345  			log.WithError(err).Error("Failed to write final node checkpoint.")
   346  		}
   347  		m.nodeCheckpointer = nil
   348  	}
   349  
   350  	return nil
   351  }
   352  
   353  // ClusterSizeDependantInterval returns a time.Duration that is dependant on
   354  // the cluster size, i.e. the number of nodes that have been discovered. This
   355  // can be used to control sync intervals of shared or centralized resources to
   356  // avoid overloading these resources as the cluster grows.
   357  //
   358  // Example sync interval with baseInterval = 1 * time.Minute
   359  //
   360  // nodes | sync interval
   361  // ------+-----------------
   362  // 1     |   41.588830833s
   363  // 2     | 1m05.916737320s
   364  // 4     | 1m36.566274746s
   365  // 8     | 2m11.833474640s
   366  // 16    | 2m49.992800643s
   367  // 32    | 3m29.790453687s
   368  // 64    | 4m10.463236193s
   369  // 128   | 4m51.588744261s
   370  // 256   | 5m32.944565093s
   371  // 512   | 6m14.416550710s
   372  // 1024  | 6m55.946873494s
   373  // 2048  | 7m37.506428894s
   374  // 4096  | 8m19.080616652s
   375  // 8192  | 9m00.662124608s
   376  // 16384 | 9m42.247293667s
   377  func (m *manager) ClusterSizeDependantInterval(baseInterval time.Duration) time.Duration {
   378  	m.mutex.RLock()
   379  	numNodes := len(m.nodes)
   380  	m.mutex.RUnlock()
   381  
   382  	return backoff.ClusterSizeDependantInterval(baseInterval, numNodes)
   383  }
   384  
   385  func (m *manager) backgroundSyncInterval() time.Duration {
   386  	return m.ClusterSizeDependantInterval(baseBackgroundSyncInterval)
   387  }
   388  
   389  // backgroundSync ensures that local node has a valid datapath in-place for
   390  // each node in the cluster. See NodeValidateImplementation().
   391  func (m *manager) backgroundSync(ctx context.Context) error {
   392  	syncTimer, syncTimerDone := inctimer.New()
   393  	defer syncTimerDone()
   394  	for {
   395  		syncInterval := m.backgroundSyncInterval()
   396  		startWaiting := syncTimer.After(syncInterval)
   397  		log.WithField("syncInterval", syncInterval.String()).Debug("Starting new iteration of background sync")
   398  		err := m.singleBackgroundLoop(ctx, syncInterval)
   399  		log.WithField("syncInterval", syncInterval.String()).Debug("Finished iteration of background sync")
   400  
   401  		select {
   402  		case <-ctx.Done():
   403  			return nil
   404  		// This handles cases when we didn't fetch nodes yet (e.g. on bootstrap)
   405  		// but also case when we have 1 node, in which case rate.Limiter doesn't
   406  		// throttle anything.
   407  		case <-startWaiting:
   408  		}
   409  
   410  		hr := m.health.NewScope("background-sync")
   411  		if err != nil {
   412  			hr.Degraded("Failed to apply node validation", err)
   413  		} else {
   414  			hr.OK("Node validation successful")
   415  		}
   416  	}
   417  }
   418  
   419  func (m *manager) singleBackgroundLoop(ctx context.Context, expectedLoopTime time.Duration) error {
   420  	var errs error
   421  	// get a copy of the node identities to avoid locking the entire manager
   422  	// throughout the process of running the datapath validation.
   423  	nodes := m.GetNodeIdentities()
   424  	limiter := rate.NewLimiter(
   425  		rate.Limit(float64(len(nodes))/float64(expectedLoopTime.Seconds())),
   426  		1, // One token in bucket to amortize for latency of the operation
   427  	)
   428  	for _, nodeIdentity := range nodes {
   429  		if err := limiter.Wait(ctx); err != nil {
   430  			log.WithError(err).Debug("Error while rate limiting backgroundSync updates")
   431  		}
   432  
   433  		select {
   434  		case <-ctx.Done():
   435  			return nil
   436  		default:
   437  		}
   438  		// Retrieve latest node information in case any event
   439  		// changed the node since the call to GetNodes()
   440  		m.mutex.RLock()
   441  		entry, ok := m.nodes[nodeIdentity]
   442  		if !ok {
   443  			m.mutex.RUnlock()
   444  			continue
   445  		}
   446  		entry.mutex.Lock()
   447  		m.mutex.RUnlock()
   448  		{
   449  			m.Iter(func(nh datapath.NodeHandler) {
   450  				if err := nh.NodeValidateImplementation(entry.node); err != nil {
   451  					log.WithFields(logrus.Fields{
   452  						"handler": nh.Name(),
   453  						"node":    entry.node.Name,
   454  					}).WithError(err).
   455  						Error("Failed to apply node handler during background sync. Cilium may have degraded functionality. See error message for details.")
   456  					errs = errors.Join(errs, fmt.Errorf("failed while handling %s on node %s: %w", nh.Name(), entry.node.Name, err))
   457  				}
   458  			})
   459  		}
   460  		entry.mutex.Unlock()
   461  
   462  		m.metrics.DatapathValidations.Inc()
   463  	}
   464  	return errs
   465  }
   466  
   467  func (m *manager) restoreNodeCheckpoint() {
   468  	path := filepath.Join(m.conf.StateDir, nodesFilename)
   469  	l := log.WithField(logfields.Path, path)
   470  	f, err := os.Open(path)
   471  	if err != nil {
   472  		if errors.Is(err, fs.ErrNotExist) {
   473  			// If we don't have a file to restore from, there's nothing we can
   474  			// do. This is expected in the upgrade path.
   475  			l.Debugf("No %v file found, cannot replay node deletion events for nodes"+
   476  				" which disappeared during downtime.", nodesFilename)
   477  			return
   478  		}
   479  		l.WithError(err).Error("failed to read node checkpoint file")
   480  		return
   481  	}
   482  
   483  	r := jsoniter.ConfigFastest.NewDecoder(bufio.NewReader(f))
   484  	var nodeCheckpoint []*nodeTypes.Node
   485  	if err := r.Decode(&nodeCheckpoint); err != nil {
   486  		l.WithError(err).Error("failed to decode node checkpoint file")
   487  		return
   488  	}
   489  
   490  	// We can't call NodeUpdated for restored nodes here, as the machinery
   491  	// assumes a fully initialized node manager, which we don't currently have.
   492  	// In addition, we only want to replay NodeDeletions, since k8s provided
   493  	// up-to-date information on all live nodes. We keep the restored nodes
   494  	// separate, let whatever init needs to happen occur and once we're synced
   495  	// to k8s, compare the restored nodes to the live ones.
   496  	for _, n := range nodeCheckpoint {
   497  		n.Source = source.Restored
   498  		m.restoredNodes[n.Identity()] = n
   499  	}
   500  }
   501  
   502  // initNodeCheckpointer sets up the trigger for writing nodes to disk.
   503  func (m *manager) initNodeCheckpointer(minInterval time.Duration) error {
   504  	var err error
   505  	health := m.health.NewScope("node-checkpoint-writer")
   506  	m.checkpointerDone = make(chan struct{})
   507  
   508  	m.nodeCheckpointer, err = trigger.NewTrigger(trigger.Parameters{
   509  		Name:        "node-checkpoint-trigger",
   510  		MinInterval: minInterval, // To avoid rapid repetition (e.g. during startup).
   511  		TriggerFunc: func(reasons []string) {
   512  			m.mutex.RLock()
   513  			select {
   514  			// The trigger package does not check whether the trigger is shut
   515  			// down already after sleeping to honor the MinInterval. Hence, we
   516  			// do so ourselves.
   517  			case <-m.checkpointerDone:
   518  				return
   519  			default:
   520  			}
   521  			err := m.checkpoint()
   522  			m.mutex.RUnlock()
   523  
   524  			if err != nil {
   525  				log.WithFields(logrus.Fields{
   526  					logfields.Reason: reasons,
   527  				}).WithError(err).Error("could not write node checkpoint")
   528  				health.Degraded("failed to write node checkpoint", err)
   529  			} else {
   530  				health.OK("node checkpoint written")
   531  			}
   532  		},
   533  	})
   534  	return err
   535  }
   536  
   537  // checkpoint writes all nodes to disk. Assumes the manager is read locked.
   538  // Don't call this directly, use the nodeCheckpointer trigger.
   539  func (m *manager) checkpoint() error {
   540  	stateDir := m.conf.StateDir
   541  	nodesPath := filepath.Join(stateDir, nodesFilename)
   542  	log.WithFields(logrus.Fields{
   543  		logfields.Path: nodesPath,
   544  	}).Debug("writing node checkpoint to disk")
   545  
   546  	// Write new contents to a temporary file which will be atomically renamed to the
   547  	// real file at the end of this function to avoid data corruption if we crash.
   548  	f, err := renameio.TempFile(stateDir, nodesPath)
   549  	if err != nil {
   550  		return fmt.Errorf("failed to open temporary file: %w", err)
   551  	}
   552  	defer f.Cleanup()
   553  
   554  	bw := bufio.NewWriter(f)
   555  	w := jsoniter.ConfigFastest.NewEncoder(bw)
   556  	ns := make([]nodeTypes.Node, 0, len(m.nodes))
   557  	for _, n := range m.nodes {
   558  		ns = append(ns, n.node)
   559  	}
   560  	if err := w.Encode(ns); err != nil {
   561  		return fmt.Errorf("failed to encode node checkpoint: %w", err)
   562  	}
   563  	if err := bw.Flush(); err != nil {
   564  		return fmt.Errorf("failed to flush node checkpoint writer: %w", err)
   565  	}
   566  
   567  	return f.CloseAtomicallyReplace()
   568  }
   569  
   570  func (m *manager) nodeAddressHasTunnelIP(address nodeTypes.Address) bool {
   571  	// If the host firewall is enabled, all traffic to remote nodes must go
   572  	// through the tunnel to preserve the source identity as part of the
   573  	// encapsulation. In encryption case we also want to use vxlan device
   574  	// to create symmetric traffic when sending nodeIP->pod and pod->nodeIP.
   575  	return address.Type == addressing.NodeCiliumInternalIP || m.conf.NodeEncryptionEnabled() ||
   576  		m.conf.EnableHostFirewall || m.conf.JoinCluster
   577  }
   578  
   579  func (m *manager) nodeAddressHasEncryptKey() bool {
   580  	// If we are doing encryption, but not node based encryption, then do not
   581  	// add a key to the nodeIPs so that we avoid a trip through stack and attempting
   582  	// to encrypt something we know does not have an encryption policy installed
   583  	// in the datapath. By setting key=0 and tunnelIP this will result in traffic
   584  	// being sent unencrypted over overlay device.
   585  	return m.conf.NodeEncryptionEnabled() &&
   586  		// Also ignore any remote node's key if the local node opted to not perform
   587  		// node-to-node encryption
   588  		!node.GetOptOutNodeEncryption()
   589  }
   590  
   591  // endpointEncryptionKey returns the encryption key index to use for the health
   592  // and ingress endpoints of a node. This is needed for WireGuard where the
   593  // node's EncryptionKey and the endpoint's EncryptionKey are not the same if
   594  // a node has opted out of node-to-node encryption by zeroing n.EncryptionKey.
   595  // With WireGuard, we always want to encrypt pod-to-pod traffic, thus we return
   596  // a static non-zero encrypt key here.
   597  // With IPSec (or no encryption), the node's encryption key index and the
   598  // encryption key of the endpoint on that node are the same.
   599  func (m *manager) endpointEncryptionKey(n *nodeTypes.Node) ipcacheTypes.EncryptKey {
   600  	if m.conf.EnableWireguard {
   601  		return ipcacheTypes.EncryptKey(types.StaticEncryptKey)
   602  	}
   603  
   604  	return ipcacheTypes.EncryptKey(n.EncryptionKey)
   605  }
   606  
   607  func (m *manager) nodeIdentityLabels(n nodeTypes.Node) (nodeLabels labels.Labels, hasOverride bool) {
   608  	nodeLabels = labels.NewFrom(labels.LabelRemoteNode)
   609  	if n.IsLocal() {
   610  		nodeLabels = labels.NewFrom(labels.LabelHost)
   611  		if m.conf.PolicyCIDRMatchesNodes() {
   612  			for _, address := range n.IPAddresses {
   613  				addr, ok := ip.AddrFromIP(address.IP)
   614  				if ok {
   615  					bitLen := addr.BitLen()
   616  					if m.conf.EnableIPv4 && bitLen == net.IPv4len*8 ||
   617  						m.conf.EnableIPv6 && bitLen == net.IPv6len*8 {
   618  						prefix, err := addr.Prefix(bitLen)
   619  						if err == nil {
   620  							cidrLabels := labels.GetCIDRLabels(prefix)
   621  							nodeLabels.MergeLabels(cidrLabels)
   622  						}
   623  					}
   624  				}
   625  			}
   626  		}
   627  	} else if !identity.NumericIdentity(n.NodeIdentity).IsReservedIdentity() {
   628  		// This needs to match clustermesh-apiserver's VMManager.AllocateNodeIdentity
   629  		nodeLabels = labels.Map2Labels(n.Labels, labels.LabelSourceK8s)
   630  		hasOverride = true
   631  	} else if !n.IsLocal() && option.Config.PerNodeLabelsEnabled() {
   632  		lbls := labels.Map2Labels(n.Labels, labels.LabelSourceNode)
   633  		filteredLbls, _ := labelsfilter.FilterNodeLabels(lbls)
   634  		nodeLabels.MergeLabels(filteredLbls)
   635  	}
   636  
   637  	return nodeLabels, hasOverride
   638  }
   639  
   640  // NodeUpdated is called after the information of a node has been updated. The
   641  // node in the manager is added or updated if the source is allowed to update
   642  // the node. If an update or addition has occurred, NodeUpdate() of the datapath
   643  // interface is invoked.
   644  func (m *manager) NodeUpdated(n nodeTypes.Node) {
   645  	log.WithFields(logrus.Fields{
   646  		logfields.ClusterName: n.Cluster,
   647  		logfields.NodeName:    n.Name,
   648  		logfields.SPI:         n.EncryptionKey,
   649  	}).Info("Node updated")
   650  	if log.Logger.IsLevelEnabled(logrus.DebugLevel) {
   651  		log.WithField(logfields.Node, n.LogRepr()).Debugf("Received node update event from %s", n.Source)
   652  	}
   653  
   654  	nodeIdentifier := n.Identity()
   655  	dpUpdate := true
   656  	var nodeIP netip.Addr
   657  	if nIP := n.GetNodeIP(false); nIP != nil {
   658  		// GH-24829: Support IPv6-only nodes.
   659  
   660  		// Skip returning the error here because at this level, we assume that
   661  		// the IP is valid as long as it's coming from nodeTypes.Node. This
   662  		// object is created either from the node discovery (K8s) or from an
   663  		// event from the kvstore.
   664  		nodeIP, _ = ip.AddrFromIP(nIP)
   665  	}
   666  
   667  	resource := ipcacheTypes.NewResourceID(ipcacheTypes.ResourceKindNode, "", n.Name)
   668  	nodeLabels, nodeIdentityOverride := m.nodeIdentityLabels(n)
   669  
   670  	var ipsetEntries []netip.Prefix
   671  	var nodeIPsAdded, healthIPsAdded, ingressIPsAdded []netip.Prefix
   672  
   673  	for _, address := range n.IPAddresses {
   674  		prefix := ip.IPToNetPrefix(address.IP)
   675  
   676  		if address.Type == addressing.NodeInternalIP && !m.ipsetFilter(&n) {
   677  			ipsetEntries = append(ipsetEntries, prefix)
   678  		}
   679  
   680  		var tunnelIP netip.Addr
   681  		if m.nodeAddressHasTunnelIP(address) {
   682  			tunnelIP = nodeIP
   683  		}
   684  
   685  		var key uint8
   686  		if m.nodeAddressHasEncryptKey() {
   687  			key = n.EncryptionKey
   688  		}
   689  
   690  		// We expect the node manager to have a source of either Kubernetes,
   691  		// CustomResource, or KVStore. Prioritize the KVStore source over the
   692  		// rest as it is the strongest source, i.e. only trigger datapath
   693  		// updates if the information we receive takes priority.
   694  		//
   695  		// There are two exceptions to the rules above:
   696  		// * kube-apiserver entries - in that case,
   697  		//   we still want to inform subscribers about changes in auxiliary
   698  		//   data such as for example the health endpoint.
   699  		// * CiliumInternal IP addresses that match configured local router IP.
   700  		//   In that case, we still want to inform subscribers about a new node
   701  		//   even when IP addresses may seem repeated across the nodes.
   702  		existing := m.ipcache.GetMetadataSourceByPrefix(prefix)
   703  		overwrite := source.AllowOverwrite(existing, n.Source)
   704  		if !overwrite && existing != source.KubeAPIServer &&
   705  			!(address.Type == addressing.NodeCiliumInternalIP && m.conf.IsLocalRouterIP(address.ToString())) {
   706  			dpUpdate = false
   707  		}
   708  
   709  		lbls := nodeLabels
   710  		// Add the CIDR labels for this node, if we allow selecting nodes by CIDR
   711  		if m.conf.PolicyCIDRMatchesNodes() {
   712  			lbls = labels.NewFrom(nodeLabels)
   713  			lbls.MergeLabels(labels.GetCIDRLabels(prefix))
   714  		}
   715  
   716  		// Always associate the prefix with metadata, even though this may not
   717  		// end up in an ipcache entry.
   718  		m.ipcache.UpsertMetadata(prefix, n.Source, resource,
   719  			lbls,
   720  			ipcacheTypes.TunnelPeer{Addr: tunnelIP},
   721  			ipcacheTypes.EncryptKey(key))
   722  		if nodeIdentityOverride {
   723  			m.ipcache.OverrideIdentity(prefix, nodeLabels, n.Source, resource)
   724  		}
   725  		nodeIPsAdded = append(nodeIPsAdded, prefix)
   726  	}
   727  
   728  	var v4Addrs, v6Addrs []netip.Addr
   729  	for _, prefix := range ipsetEntries {
   730  		addr := prefix.Addr()
   731  		if addr.Is6() {
   732  			v6Addrs = append(v6Addrs, addr)
   733  		} else {
   734  			v4Addrs = append(v4Addrs, addr)
   735  		}
   736  	}
   737  	m.ipsetMgr.AddToIPSet(ipset.CiliumNodeIPSetV4, ipset.INetFamily, v4Addrs...)
   738  	m.ipsetMgr.AddToIPSet(ipset.CiliumNodeIPSetV6, ipset.INet6Family, v6Addrs...)
   739  
   740  	for _, address := range []net.IP{n.IPv4HealthIP, n.IPv6HealthIP} {
   741  		healthIP := ip.IPToNetPrefix(address)
   742  		if !healthIP.IsValid() {
   743  			continue
   744  		}
   745  		if !source.AllowOverwrite(m.ipcache.GetMetadataSourceByPrefix(healthIP), n.Source) {
   746  			dpUpdate = false
   747  		}
   748  
   749  		m.ipcache.UpsertMetadata(healthIP, n.Source, resource,
   750  			labels.LabelHealth,
   751  			ipcacheTypes.TunnelPeer{Addr: nodeIP},
   752  			m.endpointEncryptionKey(&n))
   753  		healthIPsAdded = append(healthIPsAdded, healthIP)
   754  	}
   755  
   756  	for _, address := range []net.IP{n.IPv4IngressIP, n.IPv6IngressIP} {
   757  		ingressIP := ip.IPToNetPrefix(address)
   758  		if !ingressIP.IsValid() {
   759  			continue
   760  		}
   761  		if !source.AllowOverwrite(m.ipcache.GetMetadataSourceByPrefix(ingressIP), n.Source) {
   762  			dpUpdate = false
   763  		}
   764  
   765  		m.ipcache.UpsertMetadata(ingressIP, n.Source, resource,
   766  			labels.LabelIngress,
   767  			ipcacheTypes.TunnelPeer{Addr: nodeIP},
   768  			m.endpointEncryptionKey(&n))
   769  		ingressIPsAdded = append(ingressIPsAdded, ingressIP)
   770  	}
   771  
   772  	m.mutex.Lock()
   773  	entry, oldNodeExists := m.nodes[nodeIdentifier]
   774  	if oldNodeExists {
   775  		m.metrics.EventsReceived.WithLabelValues("update", string(n.Source)).Inc()
   776  
   777  		if !source.AllowOverwrite(entry.node.Source, n.Source) {
   778  			// Done; skip node-handler updates and label injection
   779  			// triggers below. Includes case where the local host
   780  			// was discovered locally and then is subsequently
   781  			// updated by the k8s watcher.
   782  			m.mutex.Unlock()
   783  			return
   784  		}
   785  
   786  		entry.mutex.Lock()
   787  		m.mutex.Unlock()
   788  		oldNode := entry.node
   789  		entry.node = n
   790  		if dpUpdate {
   791  			var errs error
   792  			m.Iter(func(nh datapath.NodeHandler) {
   793  				if err := nh.NodeUpdate(oldNode, entry.node); err != nil {
   794  					log.WithFields(logrus.Fields{
   795  						"handler": nh.Name(),
   796  						"node":    entry.node.Name,
   797  					}).WithError(err).
   798  						Error("Failed to handle node update event while applying handler. Cilium may be have degraded functionality. See error message for details.")
   799  					errs = errors.Join(errs, err)
   800  				}
   801  			})
   802  
   803  			hr := m.health.NewScope("nodes-update")
   804  			if errs != nil {
   805  				hr.Degraded("Failed to update nodes", errs)
   806  			} else {
   807  				hr.OK("Node updates successful")
   808  			}
   809  		}
   810  
   811  		m.removeNodeFromIPCache(oldNode, resource, ipsetEntries, nodeIPsAdded, healthIPsAdded, ingressIPsAdded)
   812  
   813  		entry.mutex.Unlock()
   814  	} else {
   815  		m.metrics.EventsReceived.WithLabelValues("add", string(n.Source)).Inc()
   816  		m.metrics.NumNodes.Inc()
   817  
   818  		entry = &nodeEntry{node: n}
   819  		entry.mutex.Lock()
   820  		m.nodes[nodeIdentifier] = entry
   821  		m.mutex.Unlock()
   822  		var errs error
   823  		if dpUpdate {
   824  			m.Iter(func(nh datapath.NodeHandler) {
   825  				if err := nh.NodeAdd(entry.node); err != nil {
   826  					log.WithFields(logrus.Fields{
   827  						"node":    entry.node.Name,
   828  						"handler": nh.Name(),
   829  					}).WithError(err).
   830  						Error("Failed to handle node update event while applying handler. Cilium may be have degraded functionality. See error message for details.")
   831  					errs = errors.Join(errs, err)
   832  				}
   833  			})
   834  		}
   835  		entry.mutex.Unlock()
   836  		hr := m.health.NewScope("nodes-add")
   837  		if errs != nil {
   838  			hr.Degraded("Failed to add nodes", errs)
   839  		} else {
   840  			hr.OK("Node adds successful")
   841  		}
   842  
   843  	}
   844  
   845  	if m.nodeCheckpointer != nil {
   846  		m.nodeCheckpointer.TriggerWithReason("NodeUpdate")
   847  	}
   848  }
   849  
   850  // removeNodeFromIPCache removes all addresses associated with oldNode from the IPCache,
   851  // unless they are present in the nodeIPsAdded, healthIPsAdded, ingressIPsAdded lists.
   852  // Removes ipset entry associated with oldNode if it is not present in ipsetEntries.
   853  //
   854  // The removal logic in this function should mirror the upsert logic in NodeUpdated.
   855  func (m *manager) removeNodeFromIPCache(oldNode nodeTypes.Node, resource ipcacheTypes.ResourceID,
   856  	ipsetEntries, nodeIPsAdded, healthIPsAdded, ingressIPsAdded []netip.Prefix) {
   857  
   858  	var oldNodeIP netip.Addr
   859  	if nIP := oldNode.GetNodeIP(false); nIP != nil {
   860  		// See comment in NodeUpdated().
   861  		oldNodeIP, _ = ip.AddrFromIP(nIP)
   862  	}
   863  	oldNodeLabels, oldNodeIdentityOverride := m.nodeIdentityLabels(oldNode)
   864  
   865  	// Delete the old node IP addresses if they have changed in this node.
   866  	var v4Addrs, v6Addrs []netip.Addr
   867  	for _, address := range oldNode.IPAddresses {
   868  		oldPrefix := ip.IPToNetPrefix(address.IP)
   869  		if slices.Contains(nodeIPsAdded, oldPrefix) {
   870  			continue
   871  		}
   872  
   873  		if address.Type == addressing.NodeInternalIP && !slices.Contains(ipsetEntries, oldPrefix) {
   874  			addr, ok := ip.AddrFromIP(address.IP)
   875  			if !ok {
   876  				log.WithField(logfields.IPAddr, address.IP).Error("unable to convert to netip.Addr")
   877  				continue
   878  			}
   879  			if addr.Is6() {
   880  				v6Addrs = append(v6Addrs, addr)
   881  			} else {
   882  				v4Addrs = append(v4Addrs, addr)
   883  			}
   884  		}
   885  
   886  		var oldTunnelIP netip.Addr
   887  		if m.nodeAddressHasTunnelIP(address) {
   888  			oldTunnelIP = oldNodeIP
   889  		}
   890  
   891  		var oldKey uint8
   892  		if m.nodeAddressHasEncryptKey() {
   893  			oldKey = oldNode.EncryptionKey
   894  		}
   895  
   896  		m.ipcache.RemoveMetadata(oldPrefix, resource,
   897  			oldNodeLabels,
   898  			ipcacheTypes.TunnelPeer{Addr: oldTunnelIP},
   899  			ipcacheTypes.EncryptKey(oldKey))
   900  		if oldNodeIdentityOverride {
   901  			m.ipcache.RemoveIdentityOverride(oldPrefix, oldNodeLabels, resource)
   902  		}
   903  	}
   904  
   905  	m.ipsetMgr.RemoveFromIPSet(ipset.CiliumNodeIPSetV4, v4Addrs...)
   906  	m.ipsetMgr.RemoveFromIPSet(ipset.CiliumNodeIPSetV6, v6Addrs...)
   907  
   908  	// Delete the old health IP addresses if they have changed in this node.
   909  	for _, address := range []net.IP{oldNode.IPv4HealthIP, oldNode.IPv6HealthIP} {
   910  		healthIP := ip.IPToNetPrefix(address)
   911  		if !healthIP.IsValid() || slices.Contains(healthIPsAdded, healthIP) {
   912  			continue
   913  		}
   914  
   915  		m.ipcache.RemoveMetadata(healthIP, resource,
   916  			labels.LabelHealth,
   917  			ipcacheTypes.TunnelPeer{Addr: oldNodeIP},
   918  			m.endpointEncryptionKey(&oldNode))
   919  	}
   920  
   921  	// Delete the old ingress IP addresses if they have changed in this node.
   922  	for _, address := range []net.IP{oldNode.IPv4IngressIP, oldNode.IPv6IngressIP} {
   923  		ingressIP := ip.IPToNetPrefix(address)
   924  		if !ingressIP.IsValid() || slices.Contains(ingressIPsAdded, ingressIP) {
   925  			continue
   926  		}
   927  
   928  		m.ipcache.RemoveMetadata(ingressIP, resource,
   929  			labels.LabelIngress,
   930  			ipcacheTypes.TunnelPeer{Addr: oldNodeIP},
   931  			m.endpointEncryptionKey(&oldNode))
   932  	}
   933  }
   934  
   935  // NodeDeleted is called after a node has been deleted. It removes the node
   936  // from the manager if the node is still owned by the source of which the event
   937  // origins from. If the node was removed, NodeDelete() is invoked of the
   938  // datapath interface.
   939  func (m *manager) NodeDeleted(n nodeTypes.Node) {
   940  	log.WithFields(logrus.Fields{
   941  		logfields.ClusterName: n.Cluster,
   942  		logfields.NodeName:    n.Name,
   943  	}).Info("Node deleted")
   944  	if log.Logger.IsLevelEnabled(logrus.DebugLevel) {
   945  		log.Debugf("Received node delete event from %s", n.Source)
   946  	}
   947  
   948  	m.metrics.EventsReceived.WithLabelValues("delete", string(n.Source)).Inc()
   949  
   950  	nodeIdentifier := n.Identity()
   951  
   952  	var (
   953  		entry         *nodeEntry
   954  		oldNodeExists bool
   955  	)
   956  
   957  	m.mutex.Lock()
   958  	// If the node is restored from disk, it doesn't exist in the bookkeeping,
   959  	// but we need to synthesize a deletion event for downstream.
   960  	if n.Source == source.Restored {
   961  		entry = &nodeEntry{
   962  			node: n,
   963  		}
   964  	} else {
   965  		entry, oldNodeExists = m.nodes[nodeIdentifier]
   966  		if !oldNodeExists {
   967  			m.mutex.Unlock()
   968  			return
   969  		}
   970  	}
   971  
   972  	// If the source is Kubernetes and the node is the node we are running on
   973  	// Kubernetes is giving us a hint it is about to delete our node. Close down
   974  	// the agent gracefully in this case.
   975  	if n.Source != entry.node.Source {
   976  		m.mutex.Unlock()
   977  		if n.IsLocal() && n.Source == source.Kubernetes {
   978  			log.Debugf("Kubernetes is deleting local node, close manager")
   979  			m.Stop(context.Background())
   980  		} else {
   981  			log.Debugf("Ignoring delete event of node %s from source %s. The node is owned by %s",
   982  				n.Name, n.Source, entry.node.Source)
   983  		}
   984  		return
   985  	}
   986  
   987  	// The ipcache is recreated from scratch on startup, no need to prune restored stale nodes.
   988  	if n.Source != source.Restored {
   989  		resource := ipcacheTypes.NewResourceID(ipcacheTypes.ResourceKindNode, "", n.Name)
   990  		m.removeNodeFromIPCache(entry.node, resource, nil, nil, nil, nil)
   991  	}
   992  
   993  	m.metrics.NumNodes.Dec()
   994  	m.metrics.ProcessNodeDeletion(n.Cluster, n.Name)
   995  
   996  	entry.mutex.Lock()
   997  	delete(m.nodes, nodeIdentifier)
   998  	if m.nodeCheckpointer != nil {
   999  		m.nodeCheckpointer.TriggerWithReason("NodeDeleted")
  1000  	}
  1001  	m.mutex.Unlock()
  1002  	var errs error
  1003  	m.Iter(func(nh datapath.NodeHandler) {
  1004  		if err := nh.NodeDelete(n); err != nil {
  1005  			// For now we log the error and continue. Eventually we will want to encorporate
  1006  			// this into the node managers health status.
  1007  			// However this is a bit tricky - as leftover node deletes are not retries so this will
  1008  			// need to be accompanied by some kind of retry mechanism.
  1009  			log.WithFields(logrus.Fields{
  1010  				"handler": nh.Name(),
  1011  				"node":    n.Name,
  1012  			}).WithError(err).Error("Failed to handle node delete event while applying handler. Cilium may be have degraded functionality.")
  1013  			errs = errors.Join(errs, err)
  1014  		}
  1015  	})
  1016  	entry.mutex.Unlock()
  1017  
  1018  	hr := m.health.NewScope("nodes-delete")
  1019  	if errs != nil {
  1020  		hr.Degraded("Failed to delete nodes", errs)
  1021  	} else {
  1022  		hr.OK("Node deletions successful")
  1023  	}
  1024  }
  1025  
  1026  // NodeSync signals the manager that the initial nodes listing (either from k8s
  1027  // or kvstore) has been completed. This allows the manager to initiate the
  1028  // deletion of possible stale nodes.
  1029  func (m *manager) NodeSync() {
  1030  	m.ipsetInitializer.InitDone()
  1031  
  1032  	// Due to the complexity around kvstore vs k8s as node sources, it may occur
  1033  	// that both sources call NodeSync at some point. Ensure we only run this
  1034  	// pruning operation once.
  1035  	m.nodePruneOnce.Do(func() {
  1036  		m.pruneNodes(false)
  1037  	})
  1038  }
  1039  
  1040  func (m *manager) MeshNodeSync() {
  1041  	m.pruneNodes(true)
  1042  }
  1043  
  1044  func (m *manager) pruneNodes(includeMeshed bool) {
  1045  	m.mutex.Lock()
  1046  	if m.restoredNodes == nil || len(m.restoredNodes) == 0 {
  1047  		m.mutex.Unlock()
  1048  		return
  1049  	}
  1050  	// Live nodes should not be pruned.
  1051  	for id := range m.nodes {
  1052  		delete(m.restoredNodes, id)
  1053  	}
  1054  
  1055  	if len(m.restoredNodes) > 0 {
  1056  		log.WithFields(logrus.Fields{
  1057  			"stale-nodes": m.restoredNodes,
  1058  		}).Info("Deleting stale nodes")
  1059  	}
  1060  	m.mutex.Unlock()
  1061  
  1062  	// Delete nodes now considered stale. Can't hold the mutex as
  1063  	// NodeDeleted also acquires it.
  1064  	for id, n := range m.restoredNodes {
  1065  		if n.Cluster == m.conf.ClusterName || includeMeshed {
  1066  			m.NodeDeleted(*n)
  1067  			delete(m.restoredNodes, id)
  1068  		}
  1069  	}
  1070  }
  1071  
  1072  // GetNodeIdentities returns a list of all node identities store in node
  1073  // manager.
  1074  func (m *manager) GetNodeIdentities() []nodeTypes.Identity {
  1075  	m.mutex.RLock()
  1076  	defer m.mutex.RUnlock()
  1077  
  1078  	nodes := make([]nodeTypes.Identity, 0, len(m.nodes))
  1079  	for nodeIdentity := range m.nodes {
  1080  		nodes = append(nodes, nodeIdentity)
  1081  	}
  1082  
  1083  	return nodes
  1084  }
  1085  
  1086  // GetNodes returns a copy of all of the nodes as a map from Identity to Node.
  1087  func (m *manager) GetNodes() map[nodeTypes.Identity]nodeTypes.Node {
  1088  	m.mutex.RLock()
  1089  	defer m.mutex.RUnlock()
  1090  
  1091  	nodes := make(map[nodeTypes.Identity]nodeTypes.Node, len(m.nodes))
  1092  	for nodeIdentity, entry := range m.nodes {
  1093  		entry.mutex.Lock()
  1094  		nodes[nodeIdentity] = entry.node
  1095  		entry.mutex.Unlock()
  1096  	}
  1097  
  1098  	return nodes
  1099  }
  1100  
  1101  // StartNodeNeighborLinkUpdater manages node neighbors links sync.
  1102  // This provides a central location for all node neighbor link updates.
  1103  // Under proper conditions, publisher enqueues the node which requires a link update.
  1104  // This controller is agnostic of the condition under which the links must be established, thus
  1105  // that responsibility lies on the publishers.
  1106  // This controller also provides for module health to be reported in a single central location.
  1107  func (m *manager) StartNodeNeighborLinkUpdater(nh datapath.NodeNeighbors) {
  1108  	sc := m.health.NewScope("neighbor-link-updater")
  1109  	controller.NewManager().UpdateController(
  1110  		"node-neighbor-link-updater",
  1111  		controller.ControllerParams{
  1112  			Group: neighborTableUpdateControllerGroup,
  1113  			DoFunc: func(ctx context.Context) error {
  1114  				var errs error
  1115  				if m.nodeNeighborQueue.isEmpty() {
  1116  					return nil
  1117  				}
  1118  				for {
  1119  					e, ok := m.nodeNeighborQueue.pop()
  1120  					if !ok {
  1121  						break
  1122  					} else if e == nil || e.node == nil {
  1123  						errs = errors.Join(errs, fmt.Errorf("invalid node spec found in queue: %#v", e))
  1124  						break
  1125  					}
  1126  
  1127  					log.Debugf("Refreshing node neighbor link for %s", e.node.Name)
  1128  					hr := sc.NewScope(e.node.Name)
  1129  					if errs = errors.Join(errs, nh.NodeNeighborRefresh(ctx, *e.node, e.refresh)); errs != nil {
  1130  						hr.Degraded("Failed node neighbor link update", errs)
  1131  					} else {
  1132  						hr.OK("Node neighbor link update successful")
  1133  					}
  1134  				}
  1135  				return errs
  1136  			},
  1137  			RunInterval: defaultNodeUpdateInterval,
  1138  		},
  1139  	)
  1140  }
  1141  
  1142  // StartNeighborRefresh spawns a controller which refreshes neighbor table
  1143  // by forcing node neighbors refresh periodically based on the arping settings.
  1144  func (m *manager) StartNeighborRefresh(nh datapath.NodeNeighbors) {
  1145  	ctx, cancel := context.WithCancel(context.Background())
  1146  	controller.NewManager().UpdateController(
  1147  		"neighbor-table-refresh",
  1148  		controller.ControllerParams{
  1149  			Group: neighborTableRefreshControllerGroup,
  1150  			DoFunc: func(controllerCtx context.Context) error {
  1151  				// Cancel previous goroutines from previous controller run
  1152  				cancel()
  1153  				ctx, cancel = context.WithCancel(controllerCtx)
  1154  				m.mutex.RLock()
  1155  				defer m.mutex.RUnlock()
  1156  				for _, entry := range m.nodes {
  1157  					entry.mutex.Lock()
  1158  					entryNode := entry.node
  1159  					entry.mutex.Unlock()
  1160  					if entryNode.IsLocal() {
  1161  						continue
  1162  					}
  1163  					go func(ctx context.Context, e *nodeTypes.Node) {
  1164  						// TODO Should this be moved to dequeue instead?
  1165  						// To avoid flooding network with arping requests
  1166  						// at the same time, spread them over the
  1167  						// [0; ARPPingRefreshPeriod/2) period.
  1168  						n := rand.Int64N(int64(m.conf.ARPPingRefreshPeriod / 2))
  1169  						time.Sleep(time.Duration(n))
  1170  						m.Enqueue(e, false)
  1171  					}(ctx, &entryNode)
  1172  				}
  1173  				return nil
  1174  			},
  1175  			RunInterval: m.conf.ARPPingRefreshPeriod,
  1176  		},
  1177  	)
  1178  }