github.com/cilium/cilium@v1.16.2/pkg/datapath/linux/node_ids.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package linux
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"net"
    10  
    11  	"github.com/cilium/cilium/api/v1/models"
    12  	"github.com/cilium/cilium/pkg/bpf"
    13  	"github.com/cilium/cilium/pkg/idpool"
    14  	"github.com/cilium/cilium/pkg/logging/logfields"
    15  	"github.com/cilium/cilium/pkg/maps/nodemap"
    16  	"github.com/cilium/cilium/pkg/node"
    17  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    18  )
    19  
    20  const (
    21  	minNodeID = 1
    22  	maxNodeID = idpool.ID(^uint16(0))
    23  )
    24  
    25  func (n *linuxNodeHandler) GetNodeIP(nodeID uint16) string {
    26  	n.mutex.RLock()
    27  	defer n.mutex.RUnlock()
    28  
    29  	// Check for local node ID explicitly as local node IPs are not in our maps!
    30  	if nodeID == 0 {
    31  		// Returns local node's IPv4 address if available, IPv6 address otherwise.
    32  		return node.GetCiliumEndpointNodeIP()
    33  	}
    34  	return n.nodeIPsByIDs[nodeID]
    35  }
    36  
    37  func (n *linuxNodeHandler) GetNodeID(nodeIP net.IP) (uint16, bool) {
    38  	n.mutex.RLock()
    39  	defer n.mutex.RUnlock()
    40  
    41  	return n.getNodeIDForIP(nodeIP)
    42  }
    43  
    44  func (n *linuxNodeHandler) getNodeIDForIP(nodeIP net.IP) (uint16, bool) {
    45  	localNodeV4 := node.GetIPv4()
    46  	localNodeV6 := node.GetIPv6()
    47  	if localNodeV4.Equal(nodeIP) || localNodeV6.Equal(nodeIP) {
    48  		return 0, true
    49  	}
    50  
    51  	if nodeID, exists := n.nodeIDsByIPs[nodeIP.String()]; exists {
    52  		return nodeID, true
    53  	}
    54  
    55  	return 0, false
    56  }
    57  
    58  // getNodeIDForNode gets the node ID for the given node if one was allocated
    59  // for any of the node IP addresses. If none is found, 0 is returned.
    60  func (n *linuxNodeHandler) getNodeIDForNode(node *nodeTypes.Node) uint16 {
    61  	nodeID := uint16(0)
    62  	for _, addr := range node.IPAddresses {
    63  		if id, exists := n.nodeIDsByIPs[addr.IP.String()]; exists {
    64  			nodeID = id
    65  		}
    66  	}
    67  	return nodeID
    68  }
    69  
    70  // allocateIDForNode allocates a new ID for the given node if one hasn't already
    71  // been assigned. If any of the node IPs have an ID associated, then all other
    72  // node IPs receive the same. This might happen if we allocated a node ID from
    73  // the ipcache, where we don't have all node IPs but only one.
    74  func (n *linuxNodeHandler) allocateIDForNode(oldNode *nodeTypes.Node, node *nodeTypes.Node) (uint16, error) {
    75  	var errs error
    76  
    77  	// Did we already allocate a node ID for any IP of that node?
    78  	nodeID := n.getNodeIDForNode(node)
    79  
    80  	// Perform an SPI refresh opportunistically.
    81  	// This avoids the scenario where the agent may have been down and didn't
    82  	// catch a NodeDelete event, leaving a stale IP address in the map.
    83  	var SPIChanged bool = true
    84  	if oldNode != nil {
    85  		SPIChanged = (oldNode.EncryptionKey != node.EncryptionKey)
    86  	}
    87  
    88  	if nodeID == 0 {
    89  		nodeID = uint16(n.nodeIDs.AllocateID())
    90  		if nodeID == uint16(idpool.NoID) {
    91  			n.log.Error("No more IDs available for nodes",
    92  				logfields.NodeName, node.Name,
    93  			)
    94  			// If we failed to allocate nodeID, don't map any IP to 0 nodeID.
    95  			// This causes later errors like "Found a foreign IP address with the ID of the current node"
    96  			// so we make early return here.
    97  			return nodeID, fmt.Errorf("no available node ID %q", node.Name)
    98  		} else {
    99  			n.log.Debug("Allocated new node ID for node",
   100  				logfields.NodeID, nodeID,
   101  				logfields.NodeName, node.Name,
   102  				logfields.SPI, node.EncryptionKey,
   103  			)
   104  		}
   105  	}
   106  
   107  	for _, addr := range node.IPAddresses {
   108  		ip := addr.IP.String()
   109  		if id, exists := n.nodeIDsByIPs[ip]; exists && id == nodeID {
   110  			if !SPIChanged {
   111  				continue
   112  			}
   113  		} else if exists && id != nodeID {
   114  			// The map is in an inconsistent state. This can occur when a node
   115  			// is deleted while the agent is down and its IPs are reused. To
   116  			// allocate a fresh ID, unmap the IPs of this node, and try again.
   117  			for _, addr := range node.IPAddresses {
   118  				if err := n.unmapNodeID(addr.IP.String()); err != nil {
   119  					n.log.Error("Failed to unmap stale nodeID mapping", logfields.IPAddr, ip, logfields.Error, err)
   120  				}
   121  			}
   122  
   123  			return n.allocateIDForNode(oldNode, node)
   124  		}
   125  		if err := n.mapNodeID(ip, nodeID, node.EncryptionKey); err != nil {
   126  			n.log.Error("Failed to map node IP address to allocated ID",
   127  				logfields.Error, err,
   128  				logfields.NodeID, nodeID,
   129  				logfields.IPAddr, ip,
   130  				logfields.SPI, node.EncryptionKey,
   131  			)
   132  			errs = errors.Join(errs,
   133  				fmt.Errorf("failed to map IP %q with node ID %q: %w", nodeID, nodeID, err))
   134  		}
   135  	}
   136  	return nodeID, errs
   137  }
   138  
   139  // deallocateIDForNode deallocates the node ID for the given node, if it was allocated.
   140  func (n *linuxNodeHandler) deallocateIDForNode(oldNode *nodeTypes.Node) error {
   141  	var errs error
   142  	nodeIPs := make(map[string]bool)
   143  	nodeID := n.getNodeIDForNode(oldNode)
   144  
   145  	// Check that all node IDs of the node had the same node ID.
   146  	for _, addr := range oldNode.IPAddresses {
   147  		nodeIPs[addr.IP.String()] = true
   148  		id := n.nodeIDsByIPs[addr.IP.String()]
   149  		if nodeID != id {
   150  			n.log.Error("Found two node IDs for the same node",
   151  				"first", id, "second", nodeID,
   152  				logfields.NodeName, oldNode.Name,
   153  				logfields.IPAddr, addr.IP,
   154  			)
   155  			errs = errors.Join(errs, fmt.Errorf("found two node IDs (%d and %d) for the same node", id, nodeID))
   156  		}
   157  	}
   158  
   159  	errs = errors.Join(n.deallocateNodeIDLocked(nodeID, nodeIPs, oldNode.Name))
   160  	return errs
   161  }
   162  
   163  func (n *linuxNodeHandler) deallocateNodeIDLocked(nodeID uint16, nodeIPs map[string]bool, nodeName string) error {
   164  	var errs error
   165  	for ip, id := range n.nodeIDsByIPs {
   166  		if nodeID != id {
   167  			continue
   168  		}
   169  		// Check that only IPs of this node had this node ID.
   170  		if _, isIPOfOldNode := nodeIPs[ip]; !isIPOfOldNode {
   171  			n.log.Error("Found a foreign IP address with the ID of the current node",
   172  				logfields.NodeName, nodeName,
   173  				logfields.IPAddr, ip,
   174  				logfields.NodeID, id,
   175  			)
   176  		}
   177  
   178  		if err := n.unmapNodeID(ip); err != nil {
   179  			n.log.Warn("Failed to remove a node IP to node ID mapping",
   180  				logfields.Error, err,
   181  				logfields.NodeID, nodeID,
   182  				logfields.IPAddr, ip,
   183  			)
   184  		}
   185  	}
   186  
   187  	if !n.nodeIDs.Insert(idpool.ID(nodeID)) {
   188  		n.log.Warn("Attempted to deallocate a node ID that wasn't allocated",
   189  			logfields.NodeID, nodeID,
   190  		)
   191  	}
   192  	n.log.Debug("Deallocated node ID", logfields.NodeID, nodeID)
   193  	return errs
   194  }
   195  
   196  // mapNodeID adds a node ID <> IP mapping into the local in-memory map of the
   197  // Node Manager and in the corresponding BPF map. If any of those map updates
   198  // fail, both are cancelled and the function returns an error.
   199  func (n *linuxNodeHandler) mapNodeID(ip string, id uint16, SPI uint8) error {
   200  	nodeIP := net.ParseIP(ip)
   201  	if nodeIP == nil {
   202  		return fmt.Errorf("invalid node IP %s", ip)
   203  	}
   204  
   205  	if err := n.nodeMap.Update(nodeIP, id, SPI); err != nil {
   206  		return err
   207  	}
   208  
   209  	// We only add the IP <> ID mapping in memory once we are sure it was
   210  	// successfully added to the BPF map.
   211  	n.nodeIDsByIPs[ip] = id
   212  	n.nodeIPsByIDs[id] = ip
   213  
   214  	return nil
   215  }
   216  
   217  // unmapNodeID removes a node ID <> IP mapping from the local in-memory map of
   218  // the Node Manager and from the corresponding BPF map. If any of those map
   219  // updates fail, it returns an error; in such a case, both are cancelled.
   220  func (n *linuxNodeHandler) unmapNodeID(ip string) error {
   221  	// Check error cases first, to avoid having to cancel anything.
   222  	if _, exists := n.nodeIDsByIPs[ip]; !exists {
   223  		return fmt.Errorf("cannot remove IP %s from node ID map as it doesn't exist", ip)
   224  	}
   225  	nodeIP := net.ParseIP(ip)
   226  	if nodeIP == nil {
   227  		return fmt.Errorf("invalid node IP %s", ip)
   228  	}
   229  
   230  	if err := n.nodeMap.Delete(nodeIP); err != nil {
   231  		return err
   232  	}
   233  	if id, exists := n.nodeIDsByIPs[ip]; exists {
   234  		delete(n.nodeIDsByIPs, ip)
   235  		delete(n.nodeIPsByIDs, id)
   236  	}
   237  
   238  	return nil
   239  }
   240  
   241  // diffAndUnmapNodeIPs takes two lists of node IP addresses: new and old ones.
   242  // It unmaps the node IP to node ID mapping for all the old IP addresses that
   243  // are not in the list of new IP addresses.
   244  func (n *linuxNodeHandler) diffAndUnmapNodeIPs(oldIPs, newIPs []nodeTypes.Address) {
   245  nextOldIP:
   246  	for _, oldAddr := range oldIPs {
   247  		for _, newAddr := range newIPs {
   248  			if newAddr.IP.Equal(oldAddr.IP) {
   249  				continue nextOldIP
   250  			}
   251  		}
   252  		if err := n.unmapNodeID(oldAddr.IP.String()); err != nil {
   253  			n.log.Warn("Failed to remove a node IP to node ID mapping",
   254  				logfields.Error, err,
   255  				logfields.IPAddr, oldAddr,
   256  			)
   257  		}
   258  	}
   259  }
   260  
   261  // DumpNodeIDs returns all node IDs and their associated IP addresses.
   262  func (n *linuxNodeHandler) DumpNodeIDs() []*models.NodeID {
   263  	n.mutex.Lock()
   264  	defer n.mutex.Unlock()
   265  
   266  	nodeIDs := map[uint16]*models.NodeID{}
   267  	for ip, id := range n.nodeIDsByIPs {
   268  		if nodeID, exists := nodeIDs[id]; exists {
   269  			nodeID.Ips = append(nodeID.Ips, ip)
   270  			nodeIDs[id] = nodeID
   271  		} else {
   272  			i := int64(id)
   273  			nodeIDs[id] = &models.NodeID{
   274  				ID:  &i,
   275  				Ips: []string{ip},
   276  			}
   277  		}
   278  	}
   279  
   280  	dump := make([]*models.NodeID, 0, len(nodeIDs))
   281  	for _, nodeID := range nodeIDs {
   282  		dump = append(dump, nodeID)
   283  	}
   284  	return dump
   285  }
   286  
   287  // RestoreNodeIDs restores node IDs and their associated IP addresses from the
   288  // BPF map and into the node handler in-memory copy.
   289  func (n *linuxNodeHandler) RestoreNodeIDs() {
   290  	// Retrieve node IDs from the BPF map to be able to restore them.
   291  	nodeValues := make(map[string]*nodemap.NodeValueV2)
   292  	incorrectNodeIDs := make(map[string]struct{})
   293  	parse := func(key *nodemap.NodeKey, val *nodemap.NodeValueV2) {
   294  		address := key.IP.String()
   295  		if key.Family == bpf.EndpointKeyIPv4 {
   296  			address = net.IP(key.IP[:net.IPv4len]).String()
   297  		}
   298  		if val.NodeID == 0 {
   299  			incorrectNodeIDs[address] = struct{}{}
   300  		}
   301  		nodeValues[address] = &nodemap.NodeValueV2{
   302  			NodeID: val.NodeID,
   303  			SPI:    val.SPI,
   304  		}
   305  	}
   306  
   307  	if err := n.nodeMap.IterateWithCallback(parse); err != nil {
   308  		n.log.Error("Failed to dump content of node map",
   309  			logfields.Error, err)
   310  		return
   311  	}
   312  
   313  	n.registerNodeIDAllocations(nodeValues)
   314  	if len(incorrectNodeIDs) > 0 {
   315  		n.log.Warn("Removing incorrect node IP to node ID mappings from the BPF map",
   316  			logfields.Count, len(incorrectNodeIDs))
   317  	}
   318  	for ip := range incorrectNodeIDs {
   319  		if err := n.unmapNodeID(ip); err != nil {
   320  			n.log.Warn("Failed to remove a incorrect node IP to node ID mapping",
   321  				logfields.Error, err,
   322  				logfields.IPAddr, ip,
   323  			)
   324  		}
   325  	}
   326  	n.log.Info("Restored node IDs from the BPF map",
   327  		logfields.Count, len(nodeValues))
   328  }
   329  
   330  func (n *linuxNodeHandler) registerNodeIDAllocations(allocatedNodeIDs map[string]*nodemap.NodeValueV2) {
   331  	n.mutex.Lock()
   332  	defer n.mutex.Unlock()
   333  
   334  	if len(n.nodeIDsByIPs) > 0 {
   335  		// If this happens, we likely have a bug in the startup logic and
   336  		// restored node IDs too late (after new node IDs were allocated).
   337  		n.log.Error("The node manager already contains node IDs")
   338  	}
   339  
   340  	// The node manager holds both a map of nodeIP=>nodeID and a pool of ID for
   341  	// the allocation of node IDs. Not only do we need to update the map,
   342  	nodeIDs := make(map[uint16]struct{})
   343  	IDsByIPs := make(map[string]uint16)
   344  	IPsByIDs := make(map[uint16]string)
   345  	for ip, val := range allocatedNodeIDs {
   346  		id := val.NodeID
   347  		IDsByIPs[ip] = id
   348  		IPsByIDs[id] = ip
   349  		// ...but we also need to remove any restored nodeID from the pool of IDs
   350  		// available for allocation.
   351  		if _, exists := nodeIDs[id]; !exists {
   352  			nodeIDs[id] = struct{}{}
   353  			if !n.nodeIDs.Remove(idpool.ID(id)) {
   354  				// This is just a sanity check. It should never happen as we
   355  				// have checked that we start with a full idpool (0 allocated
   356  				// node IDs) and then only remove them from the idpool if they
   357  				// were already removed.
   358  				n.log.Error("Node ID was already allocated",
   359  					logfields.NodeID, id,
   360  				)
   361  			}
   362  		}
   363  	}
   364  
   365  	n.nodeIDsByIPs = IDsByIPs
   366  	n.nodeIPsByIDs = IPsByIDs
   367  }