github.com/cilium/cilium@v1.16.2/pkg/ipam/crd.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package ipam
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"net"
    11  	"reflect"
    12  	"strconv"
    13  	"sync"
    14  
    15  	"github.com/sirupsen/logrus"
    16  	"github.com/vishvananda/netlink"
    17  	"golang.org/x/sys/unix"
    18  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    19  	"k8s.io/apimachinery/pkg/fields"
    20  	"k8s.io/apimachinery/pkg/util/wait"
    21  	"k8s.io/client-go/tools/cache"
    22  
    23  	alibabaCloud "github.com/cilium/cilium/pkg/alibabacloud/utils"
    24  	"github.com/cilium/cilium/pkg/cidr"
    25  	"github.com/cilium/cilium/pkg/ip"
    26  	ipamOption "github.com/cilium/cilium/pkg/ipam/option"
    27  	ipamTypes "github.com/cilium/cilium/pkg/ipam/types"
    28  	ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    29  	"github.com/cilium/cilium/pkg/k8s/client"
    30  	"github.com/cilium/cilium/pkg/k8s/informer"
    31  	"github.com/cilium/cilium/pkg/k8s/utils"
    32  	"github.com/cilium/cilium/pkg/lock"
    33  	"github.com/cilium/cilium/pkg/logging/logfields"
    34  	"github.com/cilium/cilium/pkg/node"
    35  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    36  	"github.com/cilium/cilium/pkg/option"
    37  	"github.com/cilium/cilium/pkg/time"
    38  	"github.com/cilium/cilium/pkg/trigger"
    39  )
    40  
    41  var (
    42  	sharedNodeStore *nodeStore
    43  	initNodeStore   sync.Once
    44  )
    45  
    46  const (
    47  	fieldName = "name"
    48  )
    49  
    50  // nodeStore represents a CiliumNode custom resource and binds the CR to a list
    51  // of allocators
    52  type nodeStore struct {
    53  	// mutex protects access to all members of this struct
    54  	mutex lock.RWMutex
    55  
    56  	// ownNode is the last known version of the own node resource
    57  	ownNode *ciliumv2.CiliumNode
    58  
    59  	// allocators is a list of allocators tied to this custom resource
    60  	allocators []*crdAllocator
    61  
    62  	// refreshTrigger is the configured trigger to synchronize updates to
    63  	// the custom resource with rate limiting
    64  	refreshTrigger *trigger.Trigger
    65  
    66  	// allocationPoolSize is the size of the IP pool for each address
    67  	// family
    68  	allocationPoolSize map[Family]int
    69  
    70  	// signal for completion of restoration
    71  	restoreFinished  chan struct{}
    72  	restoreCloseOnce sync.Once
    73  
    74  	clientset client.Clientset
    75  
    76  	conf      *option.DaemonConfig
    77  	mtuConfig MtuConfiguration
    78  }
    79  
    80  // newNodeStore initializes a new store which reflects the CiliumNode custom
    81  // resource of the specified node name
    82  func newNodeStore(nodeName string, conf *option.DaemonConfig, owner Owner, localNodeStore *node.LocalNodeStore, clientset client.Clientset, k8sEventReg K8sEventRegister, mtuConfig MtuConfiguration) *nodeStore {
    83  	log.WithField(fieldName, nodeName).Info("Subscribed to CiliumNode custom resource")
    84  
    85  	store := &nodeStore{
    86  		allocators:         []*crdAllocator{},
    87  		allocationPoolSize: map[Family]int{},
    88  		conf:               conf,
    89  		mtuConfig:          mtuConfig,
    90  		clientset:          clientset,
    91  	}
    92  	store.restoreFinished = make(chan struct{})
    93  
    94  	t, err := trigger.NewTrigger(trigger.Parameters{
    95  		Name:        "crd-allocator-node-refresher",
    96  		MinInterval: conf.IPAMCiliumNodeUpdateRate,
    97  		TriggerFunc: store.refreshNodeTrigger,
    98  	})
    99  	if err != nil {
   100  		log.WithError(err).Fatal("Unable to initialize CiliumNode synchronization trigger")
   101  	}
   102  	store.refreshTrigger = t
   103  
   104  	// Create the CiliumNode custom resource. This call will block until
   105  	// the custom resource has been created
   106  	owner.UpdateCiliumNodeResource()
   107  	apiGroup := "cilium/v2::CiliumNode"
   108  	ciliumNodeSelector := fields.ParseSelectorOrDie("metadata.name=" + nodeName)
   109  	_, ciliumNodeInformer := informer.NewInformer(
   110  		utils.ListerWatcherWithFields(
   111  			utils.ListerWatcherFromTyped[*ciliumv2.CiliumNodeList](clientset.CiliumV2().CiliumNodes()),
   112  			ciliumNodeSelector),
   113  		&ciliumv2.CiliumNode{},
   114  		0,
   115  		cache.ResourceEventHandlerFuncs{
   116  			AddFunc: func(obj interface{}) {
   117  				var valid, equal bool
   118  				defer func() { k8sEventReg.K8sEventReceived(apiGroup, "CiliumNode", "create", valid, equal) }()
   119  				if node, ok := obj.(*ciliumv2.CiliumNode); ok {
   120  					valid = true
   121  					store.updateLocalNodeResource(node.DeepCopy())
   122  					k8sEventReg.K8sEventProcessed("CiliumNode", "create", true)
   123  				} else {
   124  					log.Warningf("Unknown CiliumNode object type %s received: %+v", reflect.TypeOf(obj), obj)
   125  				}
   126  			},
   127  			UpdateFunc: func(oldObj, newObj interface{}) {
   128  				var valid, equal bool
   129  				defer func() { k8sEventReg.K8sEventReceived(apiGroup, "CiliumNode", "update", valid, equal) }()
   130  				if oldNode, ok := oldObj.(*ciliumv2.CiliumNode); ok {
   131  					if newNode, ok := newObj.(*ciliumv2.CiliumNode); ok {
   132  						valid = true
   133  						newNode = newNode.DeepCopy()
   134  						if oldNode.DeepEqual(newNode) {
   135  							// The UpdateStatus call in refreshNode requires an up-to-date
   136  							// CiliumNode.ObjectMeta.ResourceVersion. Therefore, we store the most
   137  							// recent version here even if the nodes are equal, because
   138  							// CiliumNode.DeepEqual will consider two nodes to be equal even if
   139  							// their resource version differs.
   140  							store.setOwnNodeWithoutPoolUpdate(newNode)
   141  							equal = true
   142  							return
   143  						}
   144  						store.updateLocalNodeResource(newNode)
   145  						k8sEventReg.K8sEventProcessed("CiliumNode", "update", true)
   146  					} else {
   147  						log.Warningf("Unknown CiliumNode object type %T received: %+v", oldNode, oldNode)
   148  					}
   149  				} else {
   150  					log.Warningf("Unknown CiliumNode object type %T received: %+v", oldNode, oldNode)
   151  				}
   152  			},
   153  			DeleteFunc: func(obj interface{}) {
   154  				// Given we are watching a single specific
   155  				// resource using the node name, any delete
   156  				// notification means that the resource
   157  				// matching the local node name has been
   158  				// removed. No attempt to cast is required.
   159  				store.deleteLocalNodeResource()
   160  				k8sEventReg.K8sEventProcessed("CiliumNode", "delete", true)
   161  				k8sEventReg.K8sEventReceived(apiGroup, "CiliumNode", "delete", true, false)
   162  			},
   163  		},
   164  		nil,
   165  	)
   166  
   167  	go ciliumNodeInformer.Run(wait.NeverStop)
   168  
   169  	log.WithField(fieldName, nodeName).Info("Waiting for CiliumNode custom resource to become available...")
   170  	if ok := cache.WaitForCacheSync(wait.NeverStop, ciliumNodeInformer.HasSynced); !ok {
   171  		log.WithField(fieldName, nodeName).Fatal("Unable to synchronize CiliumNode custom resource")
   172  	} else {
   173  		log.WithField(fieldName, nodeName).Info("Successfully synchronized CiliumNode custom resource")
   174  	}
   175  
   176  	for {
   177  		minimumReached, required, numAvailable := store.hasMinimumIPsInPool(localNodeStore)
   178  		logFields := logrus.Fields{
   179  			fieldName:   nodeName,
   180  			"required":  required,
   181  			"available": numAvailable,
   182  		}
   183  		if minimumReached {
   184  			log.WithFields(logFields).Info("All required IPs are available in CRD-backed allocation pool")
   185  			break
   186  		}
   187  
   188  		log.WithFields(logFields).WithField(
   189  			logfields.HelpMessage,
   190  			"Check if cilium-operator pod is running and does not have any warnings or error messages.",
   191  		).Info("Waiting for IPs to become available in CRD-backed allocation pool")
   192  		time.Sleep(5 * time.Second)
   193  	}
   194  
   195  	go func() {
   196  		// Initial upstream sync must wait for the allocated IPs
   197  		// to be restored
   198  		<-store.restoreFinished
   199  		store.refreshTrigger.TriggerWithReason("initial sync")
   200  	}()
   201  
   202  	return store
   203  }
   204  
   205  func deriveVpcCIDRs(node *ciliumv2.CiliumNode) (primaryCIDR *cidr.CIDR, secondaryCIDRs []*cidr.CIDR) {
   206  	// A node belongs to a single VPC so we can pick the first ENI
   207  	// in the list and derive the VPC CIDR from it.
   208  	for _, eni := range node.Status.ENI.ENIs {
   209  		c, err := cidr.ParseCIDR(eni.VPC.PrimaryCIDR)
   210  		if err == nil {
   211  			primaryCIDR = c
   212  			for _, sc := range eni.VPC.CIDRs {
   213  				c, err = cidr.ParseCIDR(sc)
   214  				if err == nil {
   215  					secondaryCIDRs = append(secondaryCIDRs, c)
   216  				}
   217  			}
   218  			return
   219  		}
   220  	}
   221  	for _, azif := range node.Status.Azure.Interfaces {
   222  		c, err := cidr.ParseCIDR(azif.CIDR)
   223  		if err == nil {
   224  			primaryCIDR = c
   225  			return
   226  		}
   227  	}
   228  	// return AlibabaCloud vpc CIDR
   229  	if len(node.Status.AlibabaCloud.ENIs) > 0 {
   230  		c, err := cidr.ParseCIDR(node.Spec.AlibabaCloud.CIDRBlock)
   231  		if err == nil {
   232  			primaryCIDR = c
   233  		}
   234  		for _, eni := range node.Status.AlibabaCloud.ENIs {
   235  			for _, sc := range eni.VPC.SecondaryCIDRs {
   236  				c, err = cidr.ParseCIDR(sc)
   237  				if err == nil {
   238  					secondaryCIDRs = append(secondaryCIDRs, c)
   239  				}
   240  			}
   241  			return
   242  		}
   243  	}
   244  	return
   245  }
   246  
   247  func (n *nodeStore) autoDetectIPv4NativeRoutingCIDR(localNodeStore *node.LocalNodeStore) bool {
   248  	if primaryCIDR, secondaryCIDRs := deriveVpcCIDRs(n.ownNode); primaryCIDR != nil {
   249  		allCIDRs := append([]*cidr.CIDR{primaryCIDR}, secondaryCIDRs...)
   250  		if nativeCIDR := n.conf.GetIPv4NativeRoutingCIDR(); nativeCIDR != nil {
   251  			found := false
   252  			for _, vpcCIDR := range allCIDRs {
   253  				logFields := logrus.Fields{
   254  					"vpc-cidr":                   vpcCIDR.String(),
   255  					option.IPv4NativeRoutingCIDR: nativeCIDR.String(),
   256  				}
   257  
   258  				ranges4, _ := ip.CoalesceCIDRs([]*net.IPNet{nativeCIDR.IPNet, vpcCIDR.IPNet})
   259  				if len(ranges4) != 1 {
   260  					log.WithFields(logFields).Info("Native routing CIDR does not contain VPC CIDR, trying next")
   261  				} else {
   262  					found = true
   263  					log.WithFields(logFields).Info("Native routing CIDR contains VPC CIDR, ignoring autodetected VPC CIDRs.")
   264  					break
   265  				}
   266  			}
   267  			if !found {
   268  				log.Fatal("None of the VPC CIDRs contains the specified native routing CIDR")
   269  			}
   270  		} else {
   271  			log.WithFields(logrus.Fields{
   272  				"vpc-cidr": primaryCIDR.String(),
   273  			}).Info("Using autodetected primary VPC CIDR.")
   274  			localNodeStore.Update(func(n *node.LocalNode) {
   275  				n.IPv4NativeRoutingCIDR = primaryCIDR
   276  			})
   277  		}
   278  		return true
   279  	} else {
   280  		log.Info("Could not determine VPC CIDRs")
   281  		return false
   282  	}
   283  }
   284  
   285  // hasMinimumIPsInPool returns true if the required number of IPs is available
   286  // in the allocation pool. It also returns the number of IPs required and
   287  // available.
   288  func (n *nodeStore) hasMinimumIPsInPool(localNodeStore *node.LocalNodeStore) (minimumReached bool, required, numAvailable int) {
   289  	n.mutex.RLock()
   290  	defer n.mutex.RUnlock()
   291  
   292  	if n.ownNode == nil {
   293  		return
   294  	}
   295  
   296  	switch {
   297  	case n.ownNode.Spec.IPAM.MinAllocate != 0:
   298  		required = n.ownNode.Spec.IPAM.MinAllocate
   299  	case n.ownNode.Spec.IPAM.PreAllocate != 0:
   300  		required = n.ownNode.Spec.IPAM.PreAllocate
   301  	case n.conf.HealthCheckingEnabled():
   302  		required = 2
   303  	default:
   304  		required = 1
   305  	}
   306  
   307  	if n.ownNode.Spec.IPAM.Pool != nil {
   308  		for ip := range n.ownNode.Spec.IPAM.Pool {
   309  			if !n.isIPInReleaseHandshake(ip) {
   310  				numAvailable++
   311  			}
   312  		}
   313  		if len(n.ownNode.Spec.IPAM.Pool) >= required {
   314  			minimumReached = true
   315  		}
   316  
   317  		if n.conf.IPAMMode() == ipamOption.IPAMENI || n.conf.IPAMMode() == ipamOption.IPAMAzure || n.conf.IPAMMode() == ipamOption.IPAMAlibabaCloud {
   318  			if !n.autoDetectIPv4NativeRoutingCIDR(localNodeStore) {
   319  				minimumReached = false
   320  			}
   321  		}
   322  	}
   323  
   324  	return
   325  }
   326  
   327  // deleteLocalNodeResource is called when the CiliumNode resource representing
   328  // the local node has been deleted.
   329  func (n *nodeStore) deleteLocalNodeResource() {
   330  	n.mutex.Lock()
   331  	n.ownNode = nil
   332  	n.mutex.Unlock()
   333  }
   334  
   335  // updateLocalNodeResource is called when the CiliumNode resource representing
   336  // the local node has been added or updated. It updates the available IPs based
   337  // on the custom resource passed into the function.
   338  func (n *nodeStore) updateLocalNodeResource(node *ciliumv2.CiliumNode) {
   339  	n.mutex.Lock()
   340  	defer n.mutex.Unlock()
   341  
   342  	if n.conf.IPAMMode() == ipamOption.IPAMENI {
   343  		if err := configureENIDevices(n.ownNode, node, n.mtuConfig); err != nil {
   344  			log.WithError(err).Errorf("Failed to update routes and rules for ENIs")
   345  		}
   346  	}
   347  
   348  	n.ownNode = node
   349  	n.allocationPoolSize[IPv4] = 0
   350  	n.allocationPoolSize[IPv6] = 0
   351  	for ipString := range node.Spec.IPAM.Pool {
   352  		if ip := net.ParseIP(ipString); ip != nil {
   353  			if ip.To4() != nil {
   354  				n.allocationPoolSize[IPv4]++
   355  			} else {
   356  				n.allocationPoolSize[IPv6]++
   357  			}
   358  		}
   359  	}
   360  
   361  	releaseUpstreamSyncNeeded := false
   362  	// ACK or NACK IPs marked for release by the operator
   363  	for ip, status := range n.ownNode.Status.IPAM.ReleaseIPs {
   364  		if n.ownNode.Spec.IPAM.Pool == nil {
   365  			continue
   366  		}
   367  		// Ignore states that agent previously responded to.
   368  		if status == ipamOption.IPAMReadyForRelease || status == ipamOption.IPAMDoNotRelease {
   369  			continue
   370  		}
   371  		if _, ok := n.ownNode.Spec.IPAM.Pool[ip]; !ok {
   372  			if status == ipamOption.IPAMReleased {
   373  				// Remove entry from release-ips only when it is removed from .spec.ipam.pool as well
   374  				delete(n.ownNode.Status.IPAM.ReleaseIPs, ip)
   375  				releaseUpstreamSyncNeeded = true
   376  
   377  				// Remove the unreachable route for this IP
   378  				if n.conf.UnreachableRoutesEnabled() {
   379  					parsedIP := net.ParseIP(ip)
   380  					if parsedIP == nil {
   381  						// Unable to parse IP, no point in trying to remove the route
   382  						log.Warningf("Unable to parse IP %s", ip)
   383  						continue
   384  					}
   385  
   386  					err := netlink.RouteDel(&netlink.Route{
   387  						Dst:   &net.IPNet{IP: parsedIP, Mask: net.CIDRMask(32, 32)},
   388  						Table: unix.RT_TABLE_MAIN,
   389  						Type:  unix.RTN_UNREACHABLE,
   390  					})
   391  					if err != nil && !errors.Is(err, unix.ESRCH) {
   392  						// We ignore ESRCH, as it means the entry was already deleted
   393  						log.WithError(err).Warningf("Unable to delete unreachable route for IP %s", ip)
   394  						continue
   395  					}
   396  				}
   397  			} else if status == ipamOption.IPAMMarkForRelease {
   398  				// NACK the IP, if this node doesn't own the IP
   399  				n.ownNode.Status.IPAM.ReleaseIPs[ip] = ipamOption.IPAMDoNotRelease
   400  				releaseUpstreamSyncNeeded = true
   401  			}
   402  			continue
   403  		}
   404  
   405  		// Ignore all other states, transition to do-not-release and ready-for-release are allowed only from
   406  		// marked-for-release
   407  		if status != ipamOption.IPAMMarkForRelease {
   408  			continue
   409  		}
   410  		// Retrieve the appropriate allocator
   411  		var allocator *crdAllocator
   412  		var ipFamily Family
   413  		if ipAddr := net.ParseIP(ip); ipAddr != nil {
   414  			ipFamily = DeriveFamily(ipAddr)
   415  		}
   416  		if ipFamily == "" {
   417  			continue
   418  		}
   419  		for _, a := range n.allocators {
   420  			if a.family == ipFamily {
   421  				allocator = a
   422  			}
   423  		}
   424  		if allocator == nil {
   425  			continue
   426  		}
   427  
   428  		// Some functions like crdAllocator.Allocate() acquire lock on allocator first and then on nodeStore.
   429  		// So release nodestore lock before acquiring allocator lock to avoid potential deadlocks from inconsistent
   430  		// lock ordering.
   431  		n.mutex.Unlock()
   432  		allocator.mutex.Lock()
   433  		_, ok := allocator.allocated[ip]
   434  		allocator.mutex.Unlock()
   435  		n.mutex.Lock()
   436  
   437  		if ok {
   438  			// IP still in use, update the operator to stop releasing the IP.
   439  			n.ownNode.Status.IPAM.ReleaseIPs[ip] = ipamOption.IPAMDoNotRelease
   440  		} else {
   441  			n.ownNode.Status.IPAM.ReleaseIPs[ip] = ipamOption.IPAMReadyForRelease
   442  		}
   443  		releaseUpstreamSyncNeeded = true
   444  	}
   445  
   446  	if releaseUpstreamSyncNeeded {
   447  		n.refreshTrigger.TriggerWithReason("excess IP release")
   448  	}
   449  }
   450  
   451  // setOwnNodeWithoutPoolUpdate overwrites the local node copy (e.g. to update
   452  // its resourceVersion) without updating the available IP pool.
   453  func (n *nodeStore) setOwnNodeWithoutPoolUpdate(node *ciliumv2.CiliumNode) {
   454  	n.mutex.Lock()
   455  	n.ownNode = node
   456  	n.mutex.Unlock()
   457  }
   458  
   459  // refreshNodeTrigger is called to refresh the custom resource after taking the
   460  // configured rate limiting into account
   461  //
   462  // Note: The function signature includes the reasons argument in order to
   463  // implement the trigger.TriggerFunc interface despite the argument being
   464  // unused.
   465  func (n *nodeStore) refreshNodeTrigger(reasons []string) {
   466  	if err := n.refreshNode(); err != nil {
   467  		log.WithError(err).Warning("Unable to update CiliumNode custom resource")
   468  		n.refreshTrigger.TriggerWithReason("retry after error")
   469  	}
   470  }
   471  
   472  // refreshNode updates the custom resource in the apiserver based on the latest
   473  // information in the local node store
   474  func (n *nodeStore) refreshNode() error {
   475  	n.mutex.RLock()
   476  	if n.ownNode == nil {
   477  		n.mutex.RUnlock()
   478  		return nil
   479  	}
   480  
   481  	node := n.ownNode.DeepCopy()
   482  	staleCopyOfAllocators := make([]*crdAllocator, len(n.allocators))
   483  	copy(staleCopyOfAllocators, n.allocators)
   484  	n.mutex.RUnlock()
   485  
   486  	node.Status.IPAM.Used = ipamTypes.AllocationMap{}
   487  
   488  	for _, a := range staleCopyOfAllocators {
   489  		a.mutex.RLock()
   490  		for ip, ipInfo := range a.allocated {
   491  			node.Status.IPAM.Used[ip] = ipInfo
   492  		}
   493  		a.mutex.RUnlock()
   494  	}
   495  
   496  	var err error
   497  	_, err = n.clientset.CiliumV2().CiliumNodes().UpdateStatus(context.TODO(), node, metav1.UpdateOptions{})
   498  
   499  	return err
   500  }
   501  
   502  // addAllocator adds a new CRD allocator to the node store
   503  func (n *nodeStore) addAllocator(allocator *crdAllocator) {
   504  	n.mutex.Lock()
   505  	n.allocators = append(n.allocators, allocator)
   506  	n.mutex.Unlock()
   507  }
   508  
   509  // allocate checks if a particular IP can be allocated or return an error
   510  func (n *nodeStore) allocate(ip net.IP) (*ipamTypes.AllocationIP, error) {
   511  	n.mutex.RLock()
   512  	defer n.mutex.RUnlock()
   513  
   514  	if n.ownNode == nil {
   515  		return nil, fmt.Errorf("CiliumNode for own node is not available")
   516  	}
   517  
   518  	if n.ownNode.Spec.IPAM.Pool == nil {
   519  		return nil, fmt.Errorf("No IPs available")
   520  	}
   521  
   522  	if n.isIPInReleaseHandshake(ip.String()) {
   523  		return nil, fmt.Errorf("IP not available, marked or ready for release")
   524  	}
   525  
   526  	ipInfo, ok := n.ownNode.Spec.IPAM.Pool[ip.String()]
   527  	if !ok {
   528  		return nil, NewIPNotAvailableInPoolError(ip)
   529  	}
   530  
   531  	return &ipInfo, nil
   532  }
   533  
   534  // isIPInReleaseHandshake validates if a given IP is currently in the process of being released
   535  func (n *nodeStore) isIPInReleaseHandshake(ip string) bool {
   536  	if n.ownNode.Status.IPAM.ReleaseIPs == nil {
   537  		return false
   538  	}
   539  	if status, ok := n.ownNode.Status.IPAM.ReleaseIPs[ip]; ok {
   540  		if status == ipamOption.IPAMMarkForRelease || status == ipamOption.IPAMReadyForRelease || status == ipamOption.IPAMReleased {
   541  			return true
   542  		}
   543  	}
   544  	return false
   545  }
   546  
   547  // allocateNext allocates the next available IP or returns an error
   548  func (n *nodeStore) allocateNext(allocated ipamTypes.AllocationMap, family Family, owner string) (net.IP, *ipamTypes.AllocationIP, error) {
   549  	n.mutex.RLock()
   550  	defer n.mutex.RUnlock()
   551  
   552  	if n.ownNode == nil {
   553  		return nil, nil, fmt.Errorf("CiliumNode for own node is not available")
   554  	}
   555  
   556  	// Check if IP has a custom owner (only supported in manual CRD mode)
   557  	if n.conf.IPAMMode() == ipamOption.IPAMCRD && len(owner) != 0 {
   558  		for ip, ipInfo := range n.ownNode.Spec.IPAM.Pool {
   559  			if ipInfo.Owner == owner {
   560  				parsedIP := net.ParseIP(ip)
   561  				if parsedIP == nil {
   562  					log.WithFields(logrus.Fields{
   563  						fieldName: n.ownNode.Name,
   564  						"ip":      ip,
   565  					}).Warning("Unable to parse IP in CiliumNode custom resource")
   566  					return nil, nil, fmt.Errorf("invalid custom ip %s for %s. ", ip, owner)
   567  				}
   568  				if DeriveFamily(parsedIP) != family {
   569  					continue
   570  				}
   571  				return parsedIP, &ipInfo, nil
   572  			}
   573  		}
   574  	}
   575  
   576  	// FIXME: This is currently using a brute-force method that can be
   577  	// optimized
   578  	for ip, ipInfo := range n.ownNode.Spec.IPAM.Pool {
   579  		if _, ok := allocated[ip]; !ok {
   580  
   581  			if n.isIPInReleaseHandshake(ip) {
   582  				continue // IP not available
   583  			}
   584  			if ipInfo.Owner != "" {
   585  				continue // IP is used by another
   586  			}
   587  			parsedIP := net.ParseIP(ip)
   588  			if parsedIP == nil {
   589  				log.WithFields(logrus.Fields{
   590  					fieldName: n.ownNode.Name,
   591  					"ip":      ip,
   592  				}).Warning("Unable to parse IP in CiliumNode custom resource")
   593  				continue
   594  			}
   595  
   596  			if DeriveFamily(parsedIP) != family {
   597  				continue
   598  			}
   599  
   600  			return parsedIP, &ipInfo, nil
   601  		}
   602  	}
   603  
   604  	return nil, nil, fmt.Errorf("No more IPs available")
   605  }
   606  
   607  // totalPoolSize returns the total size of the allocation pool
   608  func (n *nodeStore) totalPoolSize(family Family) int {
   609  	n.mutex.RLock()
   610  	defer n.mutex.RUnlock()
   611  
   612  	if num, ok := n.allocationPoolSize[family]; ok {
   613  		return num
   614  	}
   615  	return 0
   616  }
   617  
   618  // crdAllocator implements the CRD-backed IP allocator
   619  type crdAllocator struct {
   620  	// store is the node store backing the custom resource
   621  	store *nodeStore
   622  
   623  	// mutex protects access to the allocated map
   624  	mutex lock.RWMutex
   625  
   626  	// allocated is a map of all allocated IPs indexed by the allocated IP
   627  	// represented as string
   628  	allocated ipamTypes.AllocationMap
   629  
   630  	// family is the address family this allocator is allocator for
   631  	family Family
   632  
   633  	conf *option.DaemonConfig
   634  }
   635  
   636  // newCRDAllocator creates a new CRD-backed IP allocator
   637  func newCRDAllocator(family Family, c *option.DaemonConfig, owner Owner, localNodeStore *node.LocalNodeStore, clientset client.Clientset, k8sEventReg K8sEventRegister, mtuConfig MtuConfiguration) Allocator {
   638  	initNodeStore.Do(func() {
   639  		sharedNodeStore = newNodeStore(nodeTypes.GetName(), c, owner, localNodeStore, clientset, k8sEventReg, mtuConfig)
   640  	})
   641  
   642  	allocator := &crdAllocator{
   643  		allocated: ipamTypes.AllocationMap{},
   644  		family:    family,
   645  		store:     sharedNodeStore,
   646  		conf:      c,
   647  	}
   648  
   649  	sharedNodeStore.addAllocator(allocator)
   650  
   651  	return allocator
   652  }
   653  
   654  // deriveGatewayIP accept the CIDR and the index of the IP in this CIDR.
   655  func deriveGatewayIP(cidr string, index int) string {
   656  	_, ipNet, err := net.ParseCIDR(cidr)
   657  	if err != nil {
   658  		log.WithError(err).Warningf("Unable to parse subnet CIDR %s", cidr)
   659  		return ""
   660  	}
   661  	gw := ip.GetIPAtIndex(*ipNet, int64(index))
   662  	if gw == nil {
   663  		return ""
   664  	}
   665  	return gw.String()
   666  }
   667  
   668  func (a *crdAllocator) buildAllocationResult(ip net.IP, ipInfo *ipamTypes.AllocationIP) (result *AllocationResult, err error) {
   669  	result = &AllocationResult{IP: ip}
   670  
   671  	a.store.mutex.RLock()
   672  	defer a.store.mutex.RUnlock()
   673  
   674  	if a.store.ownNode == nil {
   675  		return
   676  	}
   677  
   678  	switch a.conf.IPAMMode() {
   679  
   680  	// In ENI mode, the Resource points to the ENI so we can derive the
   681  	// master interface and all CIDRs of the VPC
   682  	case ipamOption.IPAMENI:
   683  		for _, eni := range a.store.ownNode.Status.ENI.ENIs {
   684  			if eni.ID == ipInfo.Resource {
   685  				result.PrimaryMAC = eni.MAC
   686  				result.CIDRs = []string{eni.VPC.PrimaryCIDR}
   687  				result.CIDRs = append(result.CIDRs, eni.VPC.CIDRs...)
   688  				// Add manually configured Native Routing CIDR
   689  				if a.conf.GetIPv4NativeRoutingCIDR() != nil {
   690  					result.CIDRs = append(result.CIDRs, a.conf.GetIPv4NativeRoutingCIDR().String())
   691  				}
   692  				if eni.Subnet.CIDR != "" {
   693  					// The gateway for a subnet and VPC is always x.x.x.1
   694  					// Ref: https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Route_Tables.html
   695  					result.GatewayIP = deriveGatewayIP(eni.Subnet.CIDR, 1)
   696  				}
   697  				result.InterfaceNumber = strconv.Itoa(eni.Number)
   698  
   699  				return
   700  			}
   701  		}
   702  		return nil, fmt.Errorf("unable to find ENI %s", ipInfo.Resource)
   703  
   704  	// In Azure mode, the Resource points to the azure interface so we can
   705  	// derive the master interface
   706  	case ipamOption.IPAMAzure:
   707  		for _, iface := range a.store.ownNode.Status.Azure.Interfaces {
   708  			if iface.ID == ipInfo.Resource {
   709  				result.PrimaryMAC = iface.MAC
   710  				result.GatewayIP = iface.Gateway
   711  				result.CIDRs = append(result.CIDRs, iface.CIDR)
   712  				// For now, we can hardcode the interface number to a valid
   713  				// integer because it will not be used in the allocation result
   714  				// anyway. To elaborate, Azure IPAM mode automatically sets
   715  				// option.Config.EgressMultiHomeIPRuleCompat to true, meaning
   716  				// that the CNI will not use the interface number when creating
   717  				// the pod rules and routes. We are hardcoding simply to bypass
   718  				// the parsing errors when InterfaceNumber is empty. See
   719  				// https://github.com/cilium/cilium/issues/15496.
   720  				//
   721  				// TODO: Once https://github.com/cilium/cilium/issues/14705 is
   722  				// resolved, then we don't need to hardcode this anymore.
   723  				result.InterfaceNumber = "0"
   724  				return
   725  			}
   726  		}
   727  		return nil, fmt.Errorf("unable to find ENI %s", ipInfo.Resource)
   728  
   729  	// In AlibabaCloud mode, the Resource points to the ENI so we can derive the
   730  	// master interface and all CIDRs of the VPC
   731  	case ipamOption.IPAMAlibabaCloud:
   732  		for _, eni := range a.store.ownNode.Status.AlibabaCloud.ENIs {
   733  			if eni.NetworkInterfaceID != ipInfo.Resource {
   734  				continue
   735  			}
   736  			result.PrimaryMAC = eni.MACAddress
   737  			result.CIDRs = []string{eni.VSwitch.CIDRBlock}
   738  
   739  			// Ref: https://www.alibabacloud.com/help/doc-detail/65398.html
   740  			result.GatewayIP = deriveGatewayIP(eni.VSwitch.CIDRBlock, -3)
   741  			result.InterfaceNumber = strconv.Itoa(alibabaCloud.GetENIIndexFromTags(eni.Tags))
   742  			return
   743  		}
   744  		return nil, fmt.Errorf("unable to find ENI %s", ipInfo.Resource)
   745  	}
   746  
   747  	return
   748  }
   749  
   750  // Allocate will attempt to find the specified IP in the custom resource and
   751  // allocate it if it is available. If the IP is unavailable or already
   752  // allocated, an error is returned. The custom resource will be updated to
   753  // reflect the newly allocated IP.
   754  func (a *crdAllocator) Allocate(ip net.IP, owner string, pool Pool) (*AllocationResult, error) {
   755  	a.mutex.Lock()
   756  	defer a.mutex.Unlock()
   757  
   758  	if _, ok := a.allocated[ip.String()]; ok {
   759  		return nil, fmt.Errorf("IP already in use")
   760  	}
   761  
   762  	ipInfo, err := a.store.allocate(ip)
   763  	if err != nil {
   764  		return nil, err
   765  	}
   766  
   767  	result, err := a.buildAllocationResult(ip, ipInfo)
   768  	if err != nil {
   769  		return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err)
   770  	}
   771  
   772  	a.markAllocated(ip, owner, *ipInfo)
   773  	// Update custom resource to reflect the newly allocated IP.
   774  	a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("allocation of IP %s", ip.String()))
   775  
   776  	return result, nil
   777  }
   778  
   779  // AllocateWithoutSyncUpstream will attempt to find the specified IP in the
   780  // custom resource and allocate it if it is available. If the IP is
   781  // unavailable or already allocated, an error is returned. The custom resource
   782  // will not be updated.
   783  func (a *crdAllocator) AllocateWithoutSyncUpstream(ip net.IP, owner string, pool Pool) (*AllocationResult, error) {
   784  	a.mutex.Lock()
   785  	defer a.mutex.Unlock()
   786  
   787  	if _, ok := a.allocated[ip.String()]; ok {
   788  		return nil, fmt.Errorf("IP already in use")
   789  	}
   790  
   791  	ipInfo, err := a.store.allocate(ip)
   792  	if err != nil {
   793  		return nil, err
   794  	}
   795  
   796  	result, err := a.buildAllocationResult(ip, ipInfo)
   797  	if err != nil {
   798  		return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err)
   799  	}
   800  
   801  	a.markAllocated(ip, owner, *ipInfo)
   802  
   803  	return result, nil
   804  }
   805  
   806  // Release will release the specified IP or return an error if the IP has not
   807  // been allocated before. The custom resource will be updated to reflect the
   808  // released IP.
   809  func (a *crdAllocator) Release(ip net.IP, pool Pool) error {
   810  	a.mutex.Lock()
   811  	defer a.mutex.Unlock()
   812  
   813  	if _, ok := a.allocated[ip.String()]; !ok {
   814  		return fmt.Errorf("IP %s is not allocated", ip.String())
   815  	}
   816  
   817  	delete(a.allocated, ip.String())
   818  	// Update custom resource to reflect the newly released IP.
   819  	a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("release of IP %s", ip.String()))
   820  
   821  	return nil
   822  }
   823  
   824  // markAllocated marks a particular IP as allocated
   825  func (a *crdAllocator) markAllocated(ip net.IP, owner string, ipInfo ipamTypes.AllocationIP) {
   826  	ipInfo.Owner = owner
   827  	a.allocated[ip.String()] = ipInfo
   828  }
   829  
   830  // AllocateNext allocates the next available IP as offered by the custom
   831  // resource or return an error if no IP is available. The custom resource will
   832  // be updated to reflect the newly allocated IP.
   833  func (a *crdAllocator) AllocateNext(owner string, pool Pool) (*AllocationResult, error) {
   834  	a.mutex.Lock()
   835  	defer a.mutex.Unlock()
   836  
   837  	ip, ipInfo, err := a.store.allocateNext(a.allocated, a.family, owner)
   838  	if err != nil {
   839  		return nil, err
   840  	}
   841  
   842  	result, err := a.buildAllocationResult(ip, ipInfo)
   843  	if err != nil {
   844  		return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err)
   845  	}
   846  
   847  	a.markAllocated(ip, owner, *ipInfo)
   848  	// Update custom resource to reflect the newly allocated IP.
   849  	a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("allocation of IP %s", ip.String()))
   850  
   851  	return result, nil
   852  }
   853  
   854  // AllocateNextWithoutSyncUpstream allocates the next available IP as offered
   855  // by the custom resource or return an error if no IP is available. The custom
   856  // resource will not be updated.
   857  func (a *crdAllocator) AllocateNextWithoutSyncUpstream(owner string, pool Pool) (*AllocationResult, error) {
   858  	a.mutex.Lock()
   859  	defer a.mutex.Unlock()
   860  
   861  	ip, ipInfo, err := a.store.allocateNext(a.allocated, a.family, owner)
   862  	if err != nil {
   863  		return nil, err
   864  	}
   865  
   866  	result, err := a.buildAllocationResult(ip, ipInfo)
   867  	if err != nil {
   868  		return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err)
   869  	}
   870  
   871  	a.markAllocated(ip, owner, *ipInfo)
   872  
   873  	return result, nil
   874  }
   875  
   876  // Dump provides a status report and lists all allocated IP addresses
   877  func (a *crdAllocator) Dump() (map[Pool]map[string]string, string) {
   878  	a.mutex.RLock()
   879  	defer a.mutex.RUnlock()
   880  
   881  	allocs := make(map[string]string, len(a.allocated))
   882  	for ip := range a.allocated {
   883  		allocs[ip] = ""
   884  	}
   885  
   886  	status := fmt.Sprintf("%d/%d allocated", len(allocs), a.store.totalPoolSize(a.family))
   887  	return map[Pool]map[string]string{PoolDefault(): allocs}, status
   888  }
   889  
   890  func (a *crdAllocator) Capacity() uint64 {
   891  	a.mutex.RLock()
   892  	defer a.mutex.RUnlock()
   893  	return uint64(a.store.totalPoolSize(a.family))
   894  }
   895  
   896  // RestoreFinished marks the status of restoration as done
   897  func (a *crdAllocator) RestoreFinished() {
   898  	a.store.restoreCloseOnce.Do(func() {
   899  		close(a.store.restoreFinished)
   900  	})
   901  }
   902  
   903  // NewIPNotAvailableInPoolError returns an error resprenting the given IP not
   904  // being available in the IPAM pool.
   905  func NewIPNotAvailableInPoolError(ip net.IP) error {
   906  	return &ErrIPNotAvailableInPool{ip: ip}
   907  }
   908  
   909  // ErrIPNotAvailableInPool represents an error when an IP is not available in
   910  // the pool.
   911  type ErrIPNotAvailableInPool struct {
   912  	ip net.IP
   913  }
   914  
   915  func (e *ErrIPNotAvailableInPool) Error() string {
   916  	return fmt.Sprintf("IP %s is not available", e.ip.String())
   917  }
   918  
   919  // Is provides this error type with the logic for use with errors.Is.
   920  func (e *ErrIPNotAvailableInPool) Is(target error) bool {
   921  	if e == nil || target == nil {
   922  		return false
   923  	}
   924  	t, ok := target.(*ErrIPNotAvailableInPool)
   925  	if !ok {
   926  		return ok
   927  	}
   928  	if t == nil {
   929  		return false
   930  	}
   931  	return t.ip.Equal(e.ip)
   932  }