github.phpd.cn/cilium/cilium@v1.6.12/pkg/aws/eni/node.go (about)

     1  // Copyright 2019 Authors of Cilium
     2  // Copyright 2017 Lyft, Inc.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package eni
    17  
    18  import (
    19  	"fmt"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/cilium/cilium/pkg/aws/types"
    24  	"github.com/cilium/cilium/pkg/defaults"
    25  	"github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    26  	"github.com/cilium/cilium/pkg/lock"
    27  	"github.com/cilium/cilium/pkg/math"
    28  	"github.com/cilium/cilium/pkg/option"
    29  	"github.com/cilium/cilium/pkg/trigger"
    30  
    31  	"github.com/aws/aws-sdk-go-v2/aws/awserr"
    32  	"github.com/sirupsen/logrus"
    33  )
    34  
    35  const (
    36  	// warningInterval is the interval for warnings which should be done
    37  	// once and then repeated if the warning persists.
    38  	warningInterval = time.Hour
    39  
    40  	// maxAttachRetries is the maximum number of attachment retries
    41  	maxAttachRetries = 5
    42  )
    43  
    44  // Node represents a Kubernetes node running Cilium with an associated
    45  // CiliumNode custom resource
    46  type Node struct {
    47  	// mutex protects all members of this structure
    48  	mutex lock.RWMutex
    49  
    50  	// name is the name of the node
    51  	name string
    52  
    53  	// resource is the link to the CiliumNode custom resource
    54  	resource *v2.CiliumNode
    55  
    56  	// stats provides accounting for various per node statistics
    57  	stats nodeStatistics
    58  
    59  	// lastMaxAdapterWarning is the timestamp when the last warning was
    60  	// printed that this node is out of adapters
    61  	lastMaxAdapterWarning time.Time
    62  
    63  	// instanceNotRunning is true when the EC2 instance backing the node is
    64  	// not running. This state is detected based on error messages returned
    65  	// when modifying instance state
    66  	instanceNotRunning bool
    67  
    68  	// waitingForPoolMaintenance is true when the node is subject to an
    69  	// IP allocation or release which must be performed before another
    70  	// allocation or release can be attempted
    71  	waitingForPoolMaintenance bool
    72  
    73  	// resyncNeeded is set to the current time when a resync with the EC2
    74  	// API is required. The timestamp is required to ensure that this is
    75  	// only reset if the resync started after the time stored in
    76  	// resyncNeeded. This is needed because resyncs and allocations happen
    77  	// in parallel.
    78  	resyncNeeded time.Time
    79  
    80  	enis map[string]v2.ENI
    81  
    82  	available map[string]v2.AllocationIP
    83  
    84  	manager *NodeManager
    85  
    86  	// poolMaintainer is the trigger used to assign/unassign
    87  	// private IP addresses of this node.
    88  	// It ensures that multiple requests to operate private IPs are
    89  	// batched together if pool maintenance is still ongoing.
    90  	poolMaintainer *trigger.Trigger
    91  
    92  	// k8sSync is the trigger used to synchronize node information with the
    93  	// K8s apiserver. The trigger is used to batch multiple updates
    94  	// together if the apiserver is slow to respond or subject to rate
    95  	// limiting.
    96  	k8sSync *trigger.Trigger
    97  }
    98  
    99  type nodeStatistics struct {
   100  	// usedIPs is the number of IPs currently in use
   101  	usedIPs int
   102  
   103  	// availableIPs is the number of IPs currently available for allocation
   104  	// by the node
   105  	availableIPs int
   106  
   107  	// neededIPs is the number of IPs needed to reach the PreAllocate
   108  	// watermwark
   109  	neededIPs int
   110  
   111  	// excessIPs is the number of free IPs exceeding MaxAboveWatermark
   112  	excessIPs int
   113  
   114  	// remainingInterfaces is the number of ENIs that can either be
   115  	// allocated or have not yet exhausted the ENI specific quota of
   116  	// addresses
   117  	remainingInterfaces int
   118  }
   119  
   120  func (n *Node) logger() *logrus.Entry {
   121  	if n == nil {
   122  		return log
   123  	}
   124  
   125  	n.mutex.RLock()
   126  	defer n.mutex.RUnlock()
   127  
   128  	return n.loggerLocked()
   129  }
   130  
   131  func (n *Node) loggerLocked() *logrus.Entry {
   132  	if n == nil {
   133  		return log
   134  	}
   135  
   136  	logger := log.WithField(fieldName, n.name)
   137  
   138  	if n.resource != nil {
   139  		logger = logger.WithField("instanceID", n.resource.Spec.ENI.InstanceID)
   140  	}
   141  
   142  	return logger
   143  }
   144  
   145  func (n *Node) getNeededAddresses() int {
   146  	n.mutex.RLock()
   147  	defer n.mutex.RUnlock()
   148  
   149  	if n.stats.neededIPs > 0 {
   150  		return n.stats.neededIPs
   151  	}
   152  	if option.Config.AwsReleaseExcessIps && n.stats.excessIPs > 0 {
   153  		// Nodes are sorted by needed addresses, return negative values of excessIPs
   154  		// so that nodes with IP deficit are resolved first
   155  		return n.stats.excessIPs * -1
   156  	}
   157  	return 0
   158  }
   159  
   160  func calculateNeededIPs(availableIPs, usedIPs, preAllocate, minAllocate int) (neededIPs int) {
   161  	if preAllocate == 0 {
   162  		preAllocate = defaults.ENIPreAllocation
   163  	}
   164  
   165  	neededIPs = preAllocate - (availableIPs - usedIPs)
   166  	if neededIPs < 0 {
   167  		neededIPs = 0
   168  	}
   169  
   170  	if minAllocate > 0 {
   171  		neededIPs = math.IntMax(neededIPs, minAllocate-availableIPs)
   172  	}
   173  
   174  	return
   175  }
   176  
   177  func calculateExcessIPs(availableIPs, usedIPs, preAllocate, minAllocate, maxAboveWatermark int) (excessIPs int) {
   178  	if preAllocate == 0 {
   179  		preAllocate = defaults.ENIPreAllocation
   180  	}
   181  
   182  	// keep availableIPs above minAllocate + maxAboveWatermark as long as
   183  	// the initial socket of min-allocate + max-above-watermark has not
   184  	// been used up yet. This is the maximum potential allocation that will
   185  	// happen on initial bootstrap.  Depending on interface restrictions,
   186  	// the actual allocation may be below this but we always want to avoid
   187  	// releasing IPs that have just been allocated.
   188  	if usedIPs <= (minAllocate + maxAboveWatermark) {
   189  		if availableIPs <= (minAllocate + maxAboveWatermark) {
   190  			return 0
   191  		}
   192  	}
   193  
   194  	// Once above the minimum allocation level, calculate based on
   195  	// pre-allocation limit with the max-above-watermark limit calculated
   196  	// in. This is again a best-effort calculation, depending on the
   197  	// interface restrictions, less than max-above-watermark may have been
   198  	// allocated but we never want to release IPs that have been allocated
   199  	// because of max-above-watermark.
   200  	excessIPs = availableIPs - usedIPs - preAllocate - maxAboveWatermark
   201  	if excessIPs < 0 {
   202  		excessIPs = 0
   203  	}
   204  
   205  	return
   206  }
   207  
   208  func (n *Node) updatedResource(resource *v2.CiliumNode) bool {
   209  	n.mutex.Lock()
   210  	// Any modification to the custom resource is seen as a sign that the
   211  	// instance is alive
   212  	if n.instanceNotRunning {
   213  		n.loggerLocked().Info("Marking node as running")
   214  		n.instanceNotRunning = false
   215  	}
   216  	n.resource = resource
   217  	n.recalculateLocked()
   218  	allocationNeeded := n.allocationNeeded()
   219  	if allocationNeeded {
   220  		n.waitingForPoolMaintenance = true
   221  		n.poolMaintainer.Trigger()
   222  	}
   223  	n.mutex.Unlock()
   224  
   225  	return allocationNeeded
   226  }
   227  
   228  func (n *Node) recalculateLocked() {
   229  	n.enis = map[string]v2.ENI{}
   230  	n.available = map[string]v2.AllocationIP{}
   231  	for _, e := range n.manager.instancesAPI.GetENIs(n.resource.Spec.ENI.InstanceID) {
   232  		n.enis[e.ID] = *e
   233  
   234  		if e.Number < n.resource.Spec.ENI.FirstInterfaceIndex {
   235  			continue
   236  		}
   237  
   238  		for _, ip := range e.Addresses {
   239  			n.available[ip] = v2.AllocationIP{Resource: e.ID}
   240  		}
   241  	}
   242  	n.stats.usedIPs = len(n.resource.Status.IPAM.Used)
   243  	n.stats.availableIPs = len(n.available)
   244  	n.stats.neededIPs = calculateNeededIPs(n.stats.availableIPs, n.stats.usedIPs, n.resource.Spec.ENI.PreAllocate, n.resource.Spec.ENI.MinAllocate)
   245  	n.stats.excessIPs = calculateExcessIPs(n.stats.availableIPs, n.stats.usedIPs, n.resource.Spec.ENI.PreAllocate, n.resource.Spec.ENI.MinAllocate, n.resource.Spec.ENI.MaxAboveWatermark)
   246  
   247  	n.loggerLocked().WithFields(logrus.Fields{
   248  		"available":                 n.stats.availableIPs,
   249  		"used":                      n.stats.usedIPs,
   250  		"toAlloc":                   n.stats.neededIPs,
   251  		"toRelease":                 n.stats.excessIPs,
   252  		"waitingForPoolMaintenance": n.waitingForPoolMaintenance,
   253  		"resyncNeeded":              n.resyncNeeded,
   254  	}).Debug("Recalculated needed addresses")
   255  }
   256  
   257  // allocationNeeded returns true if this node requires IPs to be allocated
   258  func (n *Node) allocationNeeded() bool {
   259  	return !n.waitingForPoolMaintenance && n.resyncNeeded.IsZero() && n.stats.neededIPs > 0
   260  }
   261  
   262  // releaseNeeded returns true if this node requires IPs to be released
   263  func (n *Node) releaseNeeded() bool {
   264  	return option.Config.AwsReleaseExcessIps && !n.waitingForPoolMaintenance && n.resyncNeeded.IsZero() && n.stats.excessIPs > 0
   265  }
   266  
   267  // ENIs returns a copy of all ENIs attached to the node
   268  func (n *Node) ENIs() (enis map[string]v2.ENI) {
   269  	enis = map[string]v2.ENI{}
   270  	n.mutex.RLock()
   271  	for _, e := range n.enis {
   272  		enis[e.ID] = e
   273  	}
   274  	n.mutex.RUnlock()
   275  	return
   276  }
   277  
   278  // Pool returns the IP allocation pool available to the node
   279  func (n *Node) Pool() (pool map[string]v2.AllocationIP) {
   280  	pool = map[string]v2.AllocationIP{}
   281  	n.mutex.RLock()
   282  	for k, allocationIP := range n.available {
   283  		pool[k] = allocationIP
   284  	}
   285  	n.mutex.RUnlock()
   286  	return
   287  }
   288  
   289  // ResourceCopy returns a deep copy of the CiliumNode custom resource
   290  // associated with the node
   291  func (n *Node) ResourceCopy() *v2.CiliumNode {
   292  	n.mutex.RLock()
   293  	defer n.mutex.RUnlock()
   294  	return n.resource.DeepCopy()
   295  }
   296  
   297  func (n *Node) getSecurityGroups() (securityGroups []string) {
   298  	// When no security groups are provided, derive them from eth0
   299  	securityGroups = n.resource.Spec.ENI.SecurityGroups
   300  	if len(securityGroups) == 0 {
   301  		if eni := n.manager.instancesAPI.GetENI(n.resource.Spec.ENI.InstanceID, 0); eni != nil {
   302  			securityGroups = eni.SecurityGroups
   303  		}
   304  	}
   305  	return
   306  }
   307  
   308  func (n *Node) errorInstanceNotRunning(err error) (notRunning bool) {
   309  	// This is handling the special case when an instance has been
   310  	// terminated but the grace period has delayed the Kubernetes node
   311  	// deletion event to not have been sent out yet. The next ENI resync
   312  	// will cause the instance to be marked as inactive.
   313  	notRunning = strings.Contains(err.Error(), "is not 'running'")
   314  	if notRunning {
   315  		n.mutex.Lock()
   316  		n.instanceNotRunning = true
   317  		n.loggerLocked().Info("Marking node as not running")
   318  		n.mutex.Unlock()
   319  	}
   320  	return
   321  }
   322  
   323  func isAttachmentIndexConflict(err error) bool {
   324  	e, ok := err.(awserr.Error)
   325  	return ok && e.Code() == "InvalidParameterValue" && strings.Contains(e.Message(), "interface attached at device")
   326  }
   327  
   328  // indexExists returns true if the specified index is occupied by an ENI in the
   329  // slice of ENIs
   330  func indexExists(enis map[string]v2.ENI, index int64) bool {
   331  	for _, e := range enis {
   332  		if e.Number == int(index) {
   333  			return true
   334  		}
   335  	}
   336  	return false
   337  }
   338  
   339  // findNextIndex returns the next available index with the provided index being
   340  // the first candidate
   341  func (n *Node) findNextIndex(index int64) int64 {
   342  	for indexExists(n.enis, index) {
   343  		index++
   344  	}
   345  	return index
   346  }
   347  
   348  // allocateENI creates an additional ENI and attaches it to the instance as
   349  // specified by the ciliumNode. neededAddresses of secondary IPs are assigned
   350  // to the interface up to the maximum number of addresses as allowed by the
   351  // ENI.
   352  func (n *Node) allocateENI(s *types.Subnet, a *allocatableResources) error {
   353  	nodeResource := n.ResourceCopy()
   354  	n.mutex.RLock()
   355  	securityGroups := n.getSecurityGroups()
   356  	neededAddresses := n.stats.neededIPs
   357  
   358  	desc := "Cilium-CNI (" + n.resource.Spec.ENI.InstanceID + ")"
   359  	toAllocate := int64(math.IntMin(neededAddresses+nodeResource.Spec.ENI.MaxAboveWatermark, a.limits.IPv4))
   360  	// Validate whether request has already been fulfilled in the meantime
   361  	if toAllocate == 0 {
   362  		n.mutex.RUnlock()
   363  		return nil
   364  	}
   365  
   366  	index := n.findNextIndex(int64(nodeResource.Spec.ENI.FirstInterfaceIndex))
   367  
   368  	scopedLog := n.loggerLocked().WithFields(logrus.Fields{
   369  		"securityGroups": securityGroups,
   370  		"subnetID":       s.ID,
   371  		"addresses":      toAllocate,
   372  	})
   373  	scopedLog.Info("No more IPs available, creating new ENI")
   374  	n.mutex.RUnlock()
   375  
   376  	eniID, eni, err := n.manager.ec2API.CreateNetworkInterface(toAllocate, s.ID, desc, securityGroups)
   377  	if err != nil {
   378  		n.manager.metricsAPI.IncENIAllocationAttempt("ENI creation failed", s.ID)
   379  		return fmt.Errorf("unable to create ENI: %s", err)
   380  	}
   381  
   382  	scopedLog = scopedLog.WithField(fieldEniID, eniID)
   383  	scopedLog.Info("Created new ENI")
   384  
   385  	var attachmentID string
   386  	for attachRetries := 0; attachRetries < maxAttachRetries; attachRetries++ {
   387  		attachmentID, err = n.manager.ec2API.AttachNetworkInterface(index, nodeResource.Spec.ENI.InstanceID, eniID)
   388  
   389  		// The index is already in use, this can happen if the local
   390  		// list of ENIs is oudated.  Retry the attachment to avoid
   391  		// having to delete the ENI
   392  		if !isAttachmentIndexConflict(err) {
   393  			break
   394  		}
   395  
   396  		index = n.findNextIndex(index + 1)
   397  	}
   398  
   399  	if err != nil {
   400  		delErr := n.manager.ec2API.DeleteNetworkInterface(eniID)
   401  		if delErr != nil {
   402  			scopedLog.WithError(delErr).Warning("Unable to undo ENI creation after failure to attach")
   403  		}
   404  
   405  		if n.errorInstanceNotRunning(err) {
   406  			return nil
   407  		}
   408  
   409  		n.manager.metricsAPI.IncENIAllocationAttempt("ENI attachment failed", s.ID)
   410  
   411  		return fmt.Errorf("unable to attach ENI at index %d: %s", index, err)
   412  	}
   413  
   414  	scopedLog = scopedLog.WithFields(logrus.Fields{
   415  		"attachmentID": attachmentID,
   416  		"index":        index,
   417  	})
   418  
   419  	eni.Number = int(index)
   420  
   421  	scopedLog.Info("Attached ENI to instance")
   422  
   423  	if nodeResource.Spec.ENI.DeleteOnTermination {
   424  		// We have an attachment ID from the last API, which lets us mark the
   425  		// interface as delete on termination
   426  		err = n.manager.ec2API.ModifyNetworkInterface(eniID, attachmentID, n.resource.Spec.ENI.DeleteOnTermination)
   427  		if err != nil {
   428  			delErr := n.manager.ec2API.DeleteNetworkInterface(eniID)
   429  			if delErr != nil {
   430  				scopedLog.WithError(delErr).Warning("Unable to undo ENI creation after failure to attach")
   431  			}
   432  
   433  			if n.errorInstanceNotRunning(err) {
   434  				return nil
   435  			}
   436  
   437  			n.manager.metricsAPI.IncENIAllocationAttempt("ENI modification failed", s.ID)
   438  			return fmt.Errorf("unable to mark ENI for deletion on termination: %s", err)
   439  		}
   440  	}
   441  
   442  	// Add the information of the created ENI to the instances manager
   443  	n.manager.instancesAPI.UpdateENI(n.resource.Spec.ENI.InstanceID, eni)
   444  
   445  	n.manager.metricsAPI.IncENIAllocationAttempt("success", s.ID)
   446  	n.manager.metricsAPI.AddIPAllocation(s.ID, toAllocate)
   447  
   448  	return nil
   449  }
   450  
   451  // allocatableResources represents the resources available for allocation for a
   452  // particular ciliumNode. If an existing ENI has IP allocation capacity left,
   453  // that capacity is used up first. If not, an available index is found to
   454  // create a new ENI.
   455  type allocatableResources struct {
   456  	instanceID          string
   457  	eni                 string
   458  	subnet              *types.Subnet
   459  	availableOnSubnet   int
   460  	limits              Limits
   461  	remainingInterfaces int
   462  	totalENIs           int
   463  	ipsToReleaseOnENI   []string
   464  }
   465  
   466  func (n *Node) determineMaintenanceAction() (*allocatableResources, error) {
   467  	n.mutex.Lock()
   468  	defer n.mutex.Unlock()
   469  
   470  	instanceType := n.resource.Spec.ENI.InstanceType
   471  	limits, ok := GetLimits(instanceType)
   472  
   473  	scopedLog := n.loggerLocked()
   474  
   475  	if !ok {
   476  		n.manager.metricsAPI.IncENIAllocationAttempt("limits unavailable", "")
   477  		return nil, fmt.Errorf("Unable to determine limits of instance type '%s'", instanceType)
   478  	}
   479  
   480  	a := &allocatableResources{
   481  		instanceID: n.resource.Spec.ENI.InstanceID,
   482  		limits:     limits,
   483  		totalENIs:  len(n.enis),
   484  	}
   485  
   486  	// Validate that the node still requires addresses to be released, the
   487  	// request may have been resolved in the meantime.
   488  	if option.Config.AwsReleaseExcessIps && n.stats.excessIPs > 0 {
   489  		// Iterate over ENIs on this node, select the ENI with the most
   490  		// addresses available for release
   491  		for key, e := range n.enis {
   492  			scopedLog.WithFields(logrus.Fields{
   493  				fieldEniID:     e.ID,
   494  				"needIndex":    n.resource.Spec.ENI.FirstInterfaceIndex,
   495  				"index":        e.Number,
   496  				"addressLimit": a.limits.IPv4,
   497  				"numAddresses": len(e.Addresses),
   498  			}).Debug("Considering ENI for IP release")
   499  
   500  			if e.Number < n.resource.Spec.ENI.FirstInterfaceIndex {
   501  				continue
   502  			}
   503  
   504  			// Count free IP addresses on this ENI
   505  			ipsOnENI := n.resource.Status.ENI.ENIs[e.ID].Addresses
   506  			freeIpsOnENI := []string{}
   507  			for _, ip := range ipsOnENI {
   508  				_, ipUsed := n.resource.Status.IPAM.Used[ip]
   509  				// exclude primary IPs
   510  				if !ipUsed && ip != e.IP {
   511  					freeIpsOnENI = append(freeIpsOnENI, ip)
   512  				}
   513  			}
   514  			freeOnENICount := len(freeIpsOnENI)
   515  
   516  			if freeOnENICount <= 0 {
   517  				continue
   518  			}
   519  
   520  			scopedLog.WithFields(logrus.Fields{
   521  				fieldEniID:       e.ID,
   522  				"excessIPs":      n.stats.excessIPs,
   523  				"freeOnENICount": freeOnENICount,
   524  			}).Debug("ENI has unused IPs that can be released")
   525  			maxReleaseOnENI := math.IntMin(freeOnENICount, n.stats.excessIPs)
   526  
   527  			firstEniWithFreeIpFound := a.ipsToReleaseOnENI == nil
   528  			eniWithMoreFreeIpsFound := maxReleaseOnENI > len(a.ipsToReleaseOnENI)
   529  			// Select the ENI with the most addresses available for release
   530  			if firstEniWithFreeIpFound || eniWithMoreFreeIpsFound {
   531  				a.eni = key
   532  				a.subnet = &types.Subnet{ID: e.Subnet.ID}
   533  				a.ipsToReleaseOnENI = freeIpsOnENI[:maxReleaseOnENI]
   534  			}
   535  		}
   536  
   537  		if a.ipsToReleaseOnENI != nil {
   538  			scopedLog = scopedLog.WithFields(logrus.Fields{
   539  				"available":      n.stats.availableIPs,
   540  				"used":           n.stats.usedIPs,
   541  				"excess":         n.stats.excessIPs,
   542  				"releasing":      a.ipsToReleaseOnENI,
   543  				"selectedENI":    n.enis[a.eni],
   544  				"selectedSubnet": a.subnet.ID,
   545  			})
   546  			scopedLog.Info("Releasing excess IPs from node")
   547  		}
   548  		return a, nil
   549  	}
   550  
   551  	// Validate that the node still requires addresses to be allocated, the
   552  	// request may have been resolved in the meantime.
   553  	maxAllocate := n.stats.neededIPs + n.resource.Spec.ENI.MaxAboveWatermark
   554  	if n.stats.neededIPs == 0 {
   555  		return nil, nil
   556  	}
   557  
   558  	for key, e := range n.enis {
   559  		scopedLog.WithFields(logrus.Fields{
   560  			fieldEniID:     e.ID,
   561  			"needIndex":    n.resource.Spec.ENI.FirstInterfaceIndex,
   562  			"index":        e.Number,
   563  			"addressLimit": a.limits.IPv4,
   564  			"numAddresses": len(e.Addresses),
   565  		}).Debug("Considering ENI for allocation")
   566  
   567  		if e.Number < n.resource.Spec.ENI.FirstInterfaceIndex {
   568  			continue
   569  		}
   570  
   571  		availableOnENI := math.IntMax(limits.IPv4-len(e.Addresses), 0)
   572  		if availableOnENI <= 0 {
   573  			continue
   574  		} else {
   575  			a.remainingInterfaces++
   576  		}
   577  
   578  		scopedLog.WithFields(logrus.Fields{
   579  			fieldEniID:       e.ID,
   580  			"maxAllocate":    maxAllocate,
   581  			"availableOnEni": availableOnENI,
   582  		}).Debug("ENI has IPs available")
   583  		maxAllocateOnENI := math.IntMin(availableOnENI, maxAllocate)
   584  
   585  		if subnet := n.manager.instancesAPI.GetSubnet(e.Subnet.ID); subnet != nil {
   586  			if subnet.AvailableAddresses > 0 && a.eni == "" {
   587  				scopedLog.WithFields(logrus.Fields{
   588  					"subnetID":           e.Subnet.ID,
   589  					"availableAddresses": subnet.AvailableAddresses,
   590  				}).Debug("Subnet has IPs available")
   591  				a.eni = key
   592  				a.subnet = subnet
   593  				a.availableOnSubnet = math.IntMin(subnet.AvailableAddresses, maxAllocateOnENI)
   594  			}
   595  		}
   596  	}
   597  
   598  	a.remainingInterfaces = limits.Adapters - a.totalENIs + a.remainingInterfaces
   599  	n.stats.remainingInterfaces = a.remainingInterfaces
   600  
   601  	scopedLog = scopedLog.WithFields(logrus.Fields{
   602  		"available":           n.stats.availableIPs,
   603  		"used":                n.stats.usedIPs,
   604  		"toAlloc":             n.stats.neededIPs,
   605  		"remainingInterfaces": n.stats.remainingInterfaces,
   606  	})
   607  
   608  	if a.eni != "" {
   609  		scopedLog = scopedLog.WithFields(logrus.Fields{
   610  			"selectedENI":          n.enis[a.eni],
   611  			"selectedSubnet":       a.subnet.ID,
   612  			"availableIPsOnSubnet": a.subnet.AvailableAddresses,
   613  		})
   614  	}
   615  
   616  	scopedLog.Info("Resolving IP deficit of node")
   617  
   618  	return a, nil
   619  }
   620  
   621  func (n *Node) prepareENICreation(a *allocatableResources) (*types.Subnet, error) {
   622  	n.mutex.Lock()
   623  	defer n.mutex.Unlock()
   624  
   625  	if a.remainingInterfaces == 0 {
   626  		// This is not a failure scenario, warn once per hour but do
   627  		// not track as ENI allocation failure. There is a separate
   628  		// metric to track nodes running at capacity.
   629  		if time.Since(n.lastMaxAdapterWarning) > warningInterval {
   630  			n.loggerLocked().WithFields(logrus.Fields{
   631  				"max":       a.limits.Adapters,
   632  				"allocated": a.totalENIs,
   633  			}).Warning("Instance is out of ENIs")
   634  			n.lastMaxAdapterWarning = time.Now()
   635  		}
   636  		return nil, nil
   637  	}
   638  
   639  	bestSubnet := n.manager.instancesAPI.FindSubnetByTags(n.resource.Spec.ENI.VpcID, n.resource.Spec.ENI.AvailabilityZone, n.resource.Spec.ENI.SubnetTags)
   640  	if bestSubnet == nil {
   641  		n.manager.metricsAPI.IncENIAllocationAttempt("no available subnet", "")
   642  		return nil, fmt.Errorf("No matching subnet available for ENI creation (VPC=%s AZ=%s SubnetTags=%s",
   643  			n.resource.Spec.ENI.VpcID, n.resource.Spec.ENI.AvailabilityZone, n.resource.Spec.ENI.SubnetTags)
   644  	}
   645  
   646  	return bestSubnet, nil
   647  }
   648  
   649  // maintainIpPool attempts to allocate or release all required IPs to fulfill
   650  // the needed gap. If required, ENIs are created.
   651  func (n *Node) maintainIpPool() error {
   652  	a, err := n.determineMaintenanceAction()
   653  	if err != nil {
   654  		return err
   655  	}
   656  
   657  	// Maintenance request has already been fulfilled
   658  	if a == nil {
   659  		return nil
   660  	}
   661  
   662  	scopedLog := n.logger()
   663  
   664  	// Release excess addresses
   665  	if a.ipsToReleaseOnENI != nil {
   666  		err := n.manager.ec2API.UnassignPrivateIpAddresses(n.enis[a.eni].ID, a.ipsToReleaseOnENI)
   667  		if err == nil {
   668  			n.manager.metricsAPI.AddIPRelease(a.subnet.ID, int64(a.availableOnSubnet))
   669  			return nil
   670  		}
   671  		n.manager.metricsAPI.IncENIAllocationAttempt("ip unassignment failed", a.subnet.ID)
   672  		scopedLog.WithFields(logrus.Fields{
   673  			fieldEniID:           n.enis[a.eni].ID,
   674  			"releasingAddresses": a.ipsToReleaseOnENI,
   675  		}).WithError(err).Warning("Unable to unassign private IPs from ENI")
   676  		return err
   677  	}
   678  
   679  	// Assign needed addresses
   680  	if a.subnet != nil && a.availableOnSubnet > 0 {
   681  		err := n.manager.ec2API.AssignPrivateIpAddresses(n.enis[a.eni].ID, int64(a.availableOnSubnet))
   682  		if err == nil {
   683  			n.manager.metricsAPI.IncENIAllocationAttempt("success", a.subnet.ID)
   684  			n.manager.metricsAPI.AddIPAllocation(a.subnet.ID, int64(a.availableOnSubnet))
   685  			return nil
   686  		}
   687  
   688  		n.manager.metricsAPI.IncENIAllocationAttempt("ip assignment failed", a.subnet.ID)
   689  		scopedLog.WithFields(logrus.Fields{
   690  			fieldEniID:           n.enis[a.eni].ID,
   691  			"requestedAddresses": a.availableOnSubnet,
   692  		}).WithError(err).Warning("Unable to assign additional private IPs to ENI, will create new ENI")
   693  	}
   694  
   695  	bestSubnet, err := n.prepareENICreation(a)
   696  	if err != nil {
   697  		return err
   698  	}
   699  
   700  	// Out of ENIs
   701  	if bestSubnet == nil {
   702  		return nil
   703  	}
   704  
   705  	return n.allocateENI(bestSubnet, a)
   706  }
   707  
   708  // MaintainIpPool attempts to allocate or release all required IPs to fulfill
   709  // the needed gap. If required, ENIs are created.
   710  func (n *Node) MaintainIpPool() error {
   711  	// If the instance is no longer running, don't attempt any deficit
   712  	// resolution and wait for the custom resource to be updated as a sign
   713  	// of life.
   714  	n.mutex.RLock()
   715  	if n.instanceNotRunning {
   716  		n.mutex.RUnlock()
   717  		return nil
   718  	}
   719  	n.mutex.RUnlock()
   720  
   721  	err := n.maintainIpPool()
   722  	n.mutex.Lock()
   723  	if err == nil {
   724  		n.loggerLocked().Debug("Setting resync needed")
   725  		n.resyncNeeded = time.Now()
   726  	}
   727  	n.recalculateLocked()
   728  	n.waitingForPoolMaintenance = false
   729  	n.mutex.Unlock()
   730  	n.manager.resyncTrigger.Trigger()
   731  	return err
   732  }
   733  
   734  // SyncToAPIServer is called to synchronize the node content with the custom
   735  // resource in the apiserver
   736  func (n *Node) SyncToAPIServer() (err error) {
   737  	var updatedNode *v2.CiliumNode
   738  
   739  	scopedLog := n.logger()
   740  	scopedLog.Debug("Refreshing node")
   741  
   742  	node := n.ResourceCopy()
   743  	origNode := node.DeepCopy()
   744  
   745  	// Always update the status first to ensure that the ENI information is
   746  	// synced for all addresses that are marked as available.
   747  	//
   748  	// Two attempts are made in case the local resource is outdated. If the
   749  	// second attempt fails as well we are likely under heavy contention,
   750  	// fall back to the controller based background interval to retry.
   751  	for retry := 0; retry < 2; retry++ {
   752  		if node.Status.IPAM.Used == nil {
   753  			node.Status.IPAM.Used = map[string]v2.AllocationIP{}
   754  		}
   755  
   756  		node.Status.ENI.ENIs = n.ENIs()
   757  
   758  		scopedLog.WithFields(logrus.Fields{
   759  			"numENIs":      len(node.Status.ENI.ENIs),
   760  			"allocatedIPs": len(node.Status.IPAM.Used),
   761  		}).Debug("Updating status of node in apiserver")
   762  
   763  		updatedNode, err = n.manager.k8sAPI.UpdateStatus(node, origNode)
   764  		if updatedNode != nil && updatedNode.Name != "" {
   765  			node = updatedNode.DeepCopy()
   766  			if err == nil {
   767  				break
   768  			}
   769  		} else if err != nil {
   770  			node, err = n.manager.k8sAPI.Get(node.Name)
   771  			if err != nil {
   772  				break
   773  			}
   774  			node = node.DeepCopy()
   775  			origNode = node.DeepCopy()
   776  		} else {
   777  			break
   778  		}
   779  	}
   780  
   781  	if err != nil {
   782  		scopedLog.WithError(err).Warning("Unable to update CiliumNode status")
   783  		return err
   784  	}
   785  
   786  	for retry := 0; retry < 2; retry++ {
   787  		if node.Spec.IPAM.Pool == nil {
   788  			node.Spec.IPAM.Pool = map[string]v2.AllocationIP{}
   789  		}
   790  
   791  		if node.Spec.ENI.PreAllocate == 0 {
   792  			node.Spec.ENI.PreAllocate = defaults.ENIPreAllocation
   793  		}
   794  
   795  		node.Spec.IPAM.Pool = n.Pool()
   796  
   797  		scopedLog.WithField("poolSize", len(node.Spec.IPAM.Pool)).Debug("Updating node in apiserver")
   798  
   799  		updatedNode, err = n.manager.k8sAPI.Update(node, origNode)
   800  		if updatedNode != nil && updatedNode.Name != "" {
   801  			node = updatedNode.DeepCopy()
   802  			if err == nil {
   803  				break
   804  			}
   805  		} else if err != nil {
   806  			node, err = n.manager.k8sAPI.Get(node.Name)
   807  			if err != nil {
   808  				break
   809  			}
   810  			node = node.DeepCopy()
   811  			origNode = node.DeepCopy()
   812  		} else {
   813  			break
   814  		}
   815  	}
   816  
   817  	if err != nil {
   818  		scopedLog.WithError(err).Warning("Unable to update CiliumNode spec")
   819  	}
   820  
   821  	return err
   822  }