github.com/cilium/cilium@v1.16.2/pkg/ipam/node_manager_test.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package ipam
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"sync"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/sirupsen/logrus"
    14  	"github.com/stretchr/testify/require"
    15  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    16  
    17  	operatorOption "github.com/cilium/cilium/operator/option"
    18  	"github.com/cilium/cilium/pkg/defaults"
    19  	metricsmock "github.com/cilium/cilium/pkg/ipam/metrics/mock"
    20  	ipamStats "github.com/cilium/cilium/pkg/ipam/stats"
    21  	ipamTypes "github.com/cilium/cilium/pkg/ipam/types"
    22  	v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    23  	"github.com/cilium/cilium/pkg/lock"
    24  	"github.com/cilium/cilium/pkg/math"
    25  	"github.com/cilium/cilium/pkg/testutils"
    26  	testipam "github.com/cilium/cilium/pkg/testutils/ipam"
    27  )
    28  
    29  var (
    30  	k8sapi = &k8sMock{}
    31  )
    32  
    33  const testPoolID = ipamTypes.PoolID("global")
    34  
    35  type allocationImplementationMock struct {
    36  	// mutex protects all fields of this structure
    37  	mutex        lock.RWMutex
    38  	poolSize     int
    39  	allocatedIPs int
    40  	ipGenerator  int
    41  }
    42  
    43  func newAllocationImplementationMock() *allocationImplementationMock {
    44  	return &allocationImplementationMock{poolSize: 2048}
    45  }
    46  
    47  func (a *allocationImplementationMock) CreateNode(obj *v2.CiliumNode, node *Node) NodeOperations {
    48  	return &nodeOperationsMock{allocator: a}
    49  }
    50  
    51  func (a *allocationImplementationMock) GetPoolQuota() ipamTypes.PoolQuotaMap {
    52  	a.mutex.RLock()
    53  	defer a.mutex.RUnlock()
    54  	return ipamTypes.PoolQuotaMap{
    55  		testPoolID: ipamTypes.PoolQuota{AvailableIPs: a.poolSize - a.allocatedIPs},
    56  	}
    57  }
    58  
    59  func (a *allocationImplementationMock) Resync(ctx context.Context) time.Time {
    60  	return time.Now()
    61  }
    62  
    63  func (a *allocationImplementationMock) InstanceSync(ctx context.Context, instanceID string) time.Time {
    64  	return time.Now()
    65  }
    66  
    67  func (a *allocationImplementationMock) HasInstance(instanceID string) bool {
    68  	return true
    69  }
    70  
    71  func (a *allocationImplementationMock) DeleteInstance(instanceID string) {
    72  }
    73  
    74  type nodeOperationsMock struct {
    75  	allocator *allocationImplementationMock
    76  
    77  	// mutex protects allocatedIPs
    78  	mutex        lock.RWMutex
    79  	allocatedIPs []string
    80  }
    81  
    82  func (n *nodeOperationsMock) GetUsedIPWithPrefixes() int {
    83  	return len(n.allocatedIPs)
    84  }
    85  
    86  func (n *nodeOperationsMock) UpdatedNode(obj *v2.CiliumNode) {}
    87  
    88  func (n *nodeOperationsMock) PopulateStatusFields(resource *v2.CiliumNode) {}
    89  
    90  func (n *nodeOperationsMock) CreateInterface(ctx context.Context, allocation *AllocationAction, scopedLog *logrus.Entry) (int, string, error) {
    91  	return 0, "operation not supported", fmt.Errorf("operation not supported")
    92  }
    93  
    94  func (n *nodeOperationsMock) ResyncInterfacesAndIPs(ctx context.Context, scopedLog *logrus.Entry) (
    95  	ipamTypes.AllocationMap,
    96  	ipamStats.InterfaceStats,
    97  	error) {
    98  	var stats ipamStats.InterfaceStats
    99  	available := ipamTypes.AllocationMap{}
   100  	n.mutex.RLock()
   101  	for _, ip := range n.allocatedIPs {
   102  		available[ip] = ipamTypes.AllocationIP{}
   103  	}
   104  	n.mutex.RUnlock()
   105  	return available, stats, nil
   106  }
   107  
   108  func (n *nodeOperationsMock) PrepareIPAllocation(scopedLog *logrus.Entry) (*AllocationAction, error) {
   109  	n.allocator.mutex.RLock()
   110  	defer n.allocator.mutex.RUnlock()
   111  	return &AllocationAction{
   112  		PoolID: testPoolID,
   113  		IPv4: IPAllocationAction{
   114  			AvailableForAllocation: n.allocator.poolSize - n.allocator.allocatedIPs,
   115  		},
   116  	}, nil
   117  }
   118  
   119  func (n *nodeOperationsMock) AllocateIPs(ctx context.Context, allocation *AllocationAction) error {
   120  	n.mutex.Lock()
   121  	n.allocator.mutex.Lock()
   122  	n.allocator.allocatedIPs += allocation.IPv4.AvailableForAllocation
   123  	for i := 0; i < allocation.IPv4.AvailableForAllocation; i++ {
   124  		n.allocator.ipGenerator++
   125  		n.allocatedIPs = append(n.allocatedIPs, fmt.Sprintf("%d", n.allocator.ipGenerator))
   126  	}
   127  	n.allocator.mutex.Unlock()
   128  	n.mutex.Unlock()
   129  	return nil
   130  }
   131  
   132  func (n *nodeOperationsMock) PrepareIPRelease(excessIPs int, scopedLog *logrus.Entry) *ReleaseAction {
   133  	n.mutex.RLock()
   134  	excessIPs = math.IntMin(excessIPs, len(n.allocatedIPs))
   135  	r := &ReleaseAction{PoolID: testPoolID}
   136  	for i := 1; i <= excessIPs; i++ {
   137  		// Release from the end of slice to avoid releasing used IPs
   138  		releaseIndex := len(n.allocatedIPs) - (excessIPs + i - 1)
   139  		r.IPsToRelease = append(r.IPsToRelease, n.allocatedIPs[releaseIndex])
   140  	}
   141  	n.mutex.RUnlock()
   142  	return r
   143  }
   144  
   145  func (n *nodeOperationsMock) releaseIP(ip string) error {
   146  	n.mutex.Lock()
   147  	defer n.mutex.Unlock()
   148  	n.allocator.mutex.Lock()
   149  	defer n.allocator.mutex.Unlock()
   150  	for i, allocatedIP := range n.allocatedIPs {
   151  		if allocatedIP == ip {
   152  			n.allocatedIPs = append(n.allocatedIPs[:i], n.allocatedIPs[i+1:]...)
   153  			n.allocator.allocatedIPs--
   154  			return nil
   155  		}
   156  	}
   157  	return fmt.Errorf("IP %s not found", ip)
   158  }
   159  
   160  func (n *nodeOperationsMock) ReleaseIPs(ctx context.Context, release *ReleaseAction) error {
   161  	for _, ipToDelete := range release.IPsToRelease {
   162  		if err := n.releaseIP(ipToDelete); err != nil {
   163  			return fmt.Errorf("unable to release IP %s: %w", ipToDelete, err)
   164  		}
   165  	}
   166  	return nil
   167  }
   168  
   169  func (n *nodeOperationsMock) GetMaximumAllocatableIPv4() int {
   170  	return 0
   171  }
   172  
   173  func (n *nodeOperationsMock) GetMinimumAllocatableIPv4() int {
   174  	return defaults.IPAMPreAllocation
   175  }
   176  
   177  func (n *nodeOperationsMock) IsPrefixDelegated() bool {
   178  	return false
   179  }
   180  
   181  func TestGetNodeNames(t *testing.T) {
   182  	am := newAllocationImplementationMock()
   183  	require.NotNil(t, am)
   184  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, false, false)
   185  	require.Nil(t, err)
   186  	require.NotNil(t, mngr)
   187  
   188  	node1 := newCiliumNode("node1", 0, 0, 0)
   189  	mngr.Upsert(node1)
   190  
   191  	names := mngr.GetNames()
   192  	require.Equal(t, 1, len(names))
   193  	require.Equal(t, "node1", names[0])
   194  
   195  	mngr.Upsert(newCiliumNode("node2", 0, 0, 0))
   196  
   197  	names = mngr.GetNames()
   198  	require.Equal(t, 2, len(names))
   199  
   200  	mngr.Delete(node1)
   201  
   202  	names = mngr.GetNames()
   203  	require.Equal(t, 1, len(names))
   204  	require.Equal(t, "node2", names[0])
   205  }
   206  
   207  func TestNodeManagerGet(t *testing.T) {
   208  	am := newAllocationImplementationMock()
   209  	require.NotNil(t, am)
   210  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, false, false)
   211  	require.Nil(t, err)
   212  	require.NotNil(t, mngr)
   213  
   214  	node1 := newCiliumNode("node1", 0, 0, 0)
   215  	mngr.Upsert(node1)
   216  
   217  	require.NotNil(t, mngr.Get("node1"))
   218  	require.Nil(t, mngr.Get("node2"))
   219  
   220  	mngr.Delete(node1)
   221  	require.Nil(t, mngr.Get("node1"))
   222  	require.Nil(t, mngr.Get("node2"))
   223  }
   224  
   225  func TestNodeManagerDelete(t *testing.T) {
   226  	am := newAllocationImplementationMock()
   227  	require.NotNil(t, am)
   228  	metrics := metricsmock.NewMockMetrics()
   229  	mngr, err := NewNodeManager(am, k8sapi, metrics, 10, false, false)
   230  	require.Nil(t, err)
   231  	require.NotNil(t, mngr)
   232  
   233  	node1 := newCiliumNode("node-foo", 0, 0, 0)
   234  	mngr.Upsert(node1)
   235  
   236  	require.NotNil(t, mngr.Get("node-foo"))
   237  	require.Nil(t, mngr.Get("node2"))
   238  
   239  	mngr.Resync(context.Background(), time.Now())
   240  	avail, used, needed := metrics.GetPerNodeMetrics("node-foo")
   241  	require.NotNil(t, avail)
   242  	require.NotNil(t, used)
   243  	require.NotNil(t, needed)
   244  	mngr.Delete(node1)
   245  	// Following a node Delete, we expect the per-node metrics for that Node to be
   246  	// deleted.
   247  	avail, used, needed = metrics.GetPerNodeMetrics("node-foo")
   248  	require.Nil(t, avail)
   249  	require.Nil(t, used)
   250  	require.Nil(t, needed)
   251  	require.Nil(t, mngr.Get("node-foo"))
   252  	require.Nil(t, mngr.Get("node2"))
   253  }
   254  
   255  type k8sMock struct{}
   256  
   257  func (k *k8sMock) Update(origNode, orig *v2.CiliumNode) (*v2.CiliumNode, error) {
   258  	return nil, nil
   259  }
   260  
   261  func (k *k8sMock) UpdateStatus(origNode, node *v2.CiliumNode) (*v2.CiliumNode, error) {
   262  	return nil, nil
   263  }
   264  
   265  func (k *k8sMock) Get(node string) (*v2.CiliumNode, error) {
   266  	return &v2.CiliumNode{}, nil
   267  }
   268  
   269  func (k *k8sMock) Create(*v2.CiliumNode) (*v2.CiliumNode, error) {
   270  	return &v2.CiliumNode{}, nil
   271  }
   272  
   273  func newCiliumNode(node string, preAllocate, minAllocate, used int) *v2.CiliumNode {
   274  	cn := &v2.CiliumNode{
   275  		ObjectMeta: metav1.ObjectMeta{Name: node, Namespace: "default"},
   276  		Spec: v2.NodeSpec{
   277  			IPAM: ipamTypes.IPAMSpec{
   278  				Pool:        ipamTypes.AllocationMap{},
   279  				PreAllocate: preAllocate,
   280  				MinAllocate: minAllocate,
   281  			},
   282  		},
   283  		Status: v2.NodeStatus{
   284  			IPAM: ipamTypes.IPAMStatus{
   285  				Used:       ipamTypes.AllocationMap{},
   286  				ReleaseIPs: map[string]ipamTypes.IPReleaseStatus{},
   287  			},
   288  		},
   289  	}
   290  
   291  	updateCiliumNode(cn, used)
   292  
   293  	return cn
   294  }
   295  
   296  func updateCiliumNode(cn *v2.CiliumNode, used int) *v2.CiliumNode {
   297  	cn.Spec.IPAM.Pool = ipamTypes.AllocationMap{}
   298  	for i := 1; i <= used; i++ {
   299  		cn.Spec.IPAM.Pool[fmt.Sprintf("1.1.1.%d", i)] = ipamTypes.AllocationIP{Resource: "foo"}
   300  	}
   301  
   302  	cn.Status.IPAM.Used = ipamTypes.AllocationMap{}
   303  	for ip, ipAllocation := range cn.Spec.IPAM.Pool {
   304  		if used > 0 {
   305  			delete(cn.Spec.IPAM.Pool, ip)
   306  			cn.Status.IPAM.Used[ip] = ipAllocation
   307  			used--
   308  		}
   309  	}
   310  
   311  	return cn
   312  }
   313  
   314  func reachedAddressesNeeded(mngr *NodeManager, nodeName string, needed int) (success bool) {
   315  	if node := mngr.Get(nodeName); node != nil {
   316  		success = node.GetNeededAddresses() == needed
   317  	}
   318  	return
   319  }
   320  
   321  // TestNodeManagerDefaultAllocation tests allocation with default parameters
   322  //
   323  // - MinAllocate 0
   324  // - PreAllocate 8
   325  func TestNodeManagerDefaultAllocation(t *testing.T) {
   326  	am := newAllocationImplementationMock()
   327  	require.NotNil(t, am)
   328  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, false, false)
   329  	require.Nil(t, err)
   330  	require.NotNil(t, mngr)
   331  
   332  	// Announce node wait for IPs to become available
   333  	cn := newCiliumNode("node1", 8, 0, 0)
   334  	mngr.Upsert(cn)
   335  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node1", 0) }, 5*time.Second))
   336  
   337  	node := mngr.Get("node1")
   338  	require.NotNil(t, node)
   339  	require.Equal(t, 8, node.Stats().IPv4.AvailableIPs)
   340  	require.Equal(t, 0, node.Stats().IPv4.UsedIPs)
   341  
   342  	// Use 7 out of 8 IPs
   343  	mngr.Upsert(updateCiliumNode(cn, 7))
   344  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node1", 0) }, 5*time.Second))
   345  
   346  	node = mngr.Get("node1")
   347  	require.NotNil(t, node)
   348  	require.Equal(t, 15, node.Stats().IPv4.AvailableIPs)
   349  	require.Equal(t, 7, node.Stats().IPv4.UsedIPs)
   350  }
   351  
   352  // TestNodeManagerMinAllocate20 tests MinAllocate without PreAllocate
   353  //
   354  // - MinAllocate 10
   355  // - PreAllocate -1
   356  func TestNodeManagerMinAllocate20(t *testing.T) {
   357  	am := newAllocationImplementationMock()
   358  	require.NotNil(t, am)
   359  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, false, false)
   360  	require.Nil(t, err)
   361  	require.NotNil(t, mngr)
   362  
   363  	// Announce node wait for IPs to become available
   364  	cn := newCiliumNode("node2", -1, 10, 0)
   365  	mngr.Upsert(cn)
   366  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second))
   367  
   368  	node := mngr.Get("node2")
   369  	require.NotNil(t, node)
   370  	require.Equal(t, 10, node.Stats().IPv4.AvailableIPs)
   371  	require.Equal(t, 0, node.Stats().IPv4.UsedIPs)
   372  
   373  	// 10 available, 8 used
   374  	mngr.Upsert(updateCiliumNode(cn, 8))
   375  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second))
   376  
   377  	node = mngr.Get("node2")
   378  	require.NotNil(t, node)
   379  	require.Equal(t, 10, node.Stats().IPv4.AvailableIPs)
   380  	require.Equal(t, 8, node.Stats().IPv4.UsedIPs)
   381  
   382  	// Change MinAllocate to 20
   383  	mngr.Upsert(newCiliumNode("node2", 0, 20, 8))
   384  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second))
   385  
   386  	node = mngr.Get("node2")
   387  	require.NotNil(t, node)
   388  	require.Equal(t, 8, node.Stats().IPv4.UsedIPs)
   389  	require.Equal(t, 20, node.Stats().IPv4.AvailableIPs)
   390  }
   391  
   392  // TestNodeManagerMinAllocateAndPreallocate tests MinAllocate in combination with PreAllocate
   393  //
   394  // - MinAllocate 10
   395  // - PreAllocate 1
   396  func TestNodeManagerMinAllocateAndPreallocate(t *testing.T) {
   397  	am := newAllocationImplementationMock()
   398  	require.NotNil(t, am)
   399  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, false, false)
   400  	require.Nil(t, err)
   401  	require.NotNil(t, mngr)
   402  
   403  	// Announce node, wait for IPs to become available
   404  	cn := newCiliumNode("node2", 1, 10, 0)
   405  	mngr.Upsert(cn)
   406  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second))
   407  
   408  	node := mngr.Get("node2")
   409  	require.NotNil(t, node)
   410  	require.Equal(t, 10, node.Stats().IPv4.AvailableIPs)
   411  	require.Equal(t, 0, node.Stats().IPv4.UsedIPs)
   412  
   413  	// Use 9 out of 10 IPs, no additional IPs should be allocated
   414  	mngr.Upsert(updateCiliumNode(cn, 9))
   415  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second))
   416  	node = mngr.Get("node2")
   417  	require.NotNil(t, node)
   418  	require.Equal(t, 10, node.Stats().IPv4.AvailableIPs)
   419  	require.Equal(t, 9, node.Stats().IPv4.UsedIPs)
   420  
   421  	// Use 10 out of 10 IPs, PreAllocate 1 must kick in and allocate an additional IP
   422  	mngr.Upsert(updateCiliumNode(cn, 10))
   423  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second))
   424  	node = mngr.Get("node2")
   425  	require.NotNil(t, node)
   426  	require.Equal(t, 11, node.Stats().IPv4.AvailableIPs)
   427  	require.Equal(t, 10, node.Stats().IPv4.UsedIPs)
   428  
   429  	// Release some IPs, no additional IPs should be allocated
   430  	mngr.Upsert(updateCiliumNode(cn, 8))
   431  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node2", 0) }, 5*time.Second))
   432  	node = mngr.Get("node2")
   433  	require.NotNil(t, node)
   434  	require.Equal(t, 11, node.Stats().IPv4.AvailableIPs)
   435  	require.Equal(t, 8, node.Stats().IPv4.UsedIPs)
   436  }
   437  
   438  // TestNodeManagerReleaseAddress tests PreAllocate, MinAllocate and MaxAboveWatermark
   439  // when release excess IP is enabled
   440  //
   441  // - MinAllocate 15
   442  // - PreAllocate 4
   443  // - MaxAboveWatermark 4
   444  func TestNodeManagerReleaseAddress(t *testing.T) {
   445  	operatorOption.Config.ExcessIPReleaseDelay = 2
   446  	am := newAllocationImplementationMock()
   447  	require.NotNil(t, am)
   448  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, true, false)
   449  	require.Nil(t, err)
   450  	require.NotNil(t, mngr)
   451  
   452  	// Announce node, wait for IPs to become available
   453  	cn := newCiliumNode("node3", 4, 15, 0)
   454  	cn.Spec.IPAM.MaxAboveWatermark = 4
   455  	mngr.Upsert(cn)
   456  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 1*time.Second))
   457  
   458  	node := mngr.Get("node3")
   459  	require.NotNil(t, node)
   460  	require.Equal(t, 19, node.Stats().IPv4.AvailableIPs)
   461  	require.Equal(t, 0, node.Stats().IPv4.UsedIPs)
   462  
   463  	// Use 11 out of 19 IPs, no additional IPs should be allocated
   464  	mngr.Upsert(updateCiliumNode(cn, 11))
   465  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second))
   466  	node = mngr.Get("node3")
   467  	require.NotNil(t, node)
   468  	require.Equal(t, 19, node.Stats().IPv4.AvailableIPs)
   469  	require.Equal(t, 11, node.Stats().IPv4.UsedIPs)
   470  
   471  	// Use 19 out of 19 IPs, PreAllocate 4 + MaxAboveWatermark must kick in and allocate 8 additional IPs
   472  	mngr.Upsert(updateCiliumNode(cn, 19))
   473  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second))
   474  	node = mngr.Get("node3")
   475  	require.NotNil(t, node)
   476  	require.Equal(t, 27, node.Stats().IPv4.AvailableIPs)
   477  	require.Equal(t, 19, node.Stats().IPv4.UsedIPs)
   478  
   479  	// Free some IPs, 5 excess IPs appears but only be released at interval based resync, so expect timeout here
   480  	mngr.Upsert(updateCiliumNode(cn, 10))
   481  	require.NotNil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 2*time.Second))
   482  	node = mngr.Get("node3")
   483  	require.NotNil(t, node)
   484  	require.Equal(t, 27, node.Stats().IPv4.AvailableIPs)
   485  	require.Equal(t, 10, node.Stats().IPv4.UsedIPs)
   486  
   487  	// Trigger resync manually, excess IPs should be released down to 18
   488  	// (10 used + 4 prealloc + 4 max-above-watermark)
   489  	// Excess timestamps should be registered after this trigger
   490  	node.instanceSync.Trigger()
   491  
   492  	// Acknowledge release IPs after 3 secs
   493  	time.AfterFunc(3*time.Second, func() {
   494  		// Excess delay duration should have elapsed by now, trigger resync again.
   495  		// IPs should be marked as excess
   496  		node.instanceSync.Trigger()
   497  		time.Sleep(1 * time.Second)
   498  		node.PopulateIPReleaseStatus(node.resource)
   499  		// Fake acknowledge IPs for release like agent would.
   500  		testipam.FakeAcknowledgeReleaseIps(node.resource)
   501  		// Resync one more time to process acknowledgements.
   502  		node.instanceSync.Trigger()
   503  	})
   504  
   505  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second))
   506  	node = mngr.Get("node3")
   507  	require.NotNil(t, node)
   508  	require.Equal(t, 19, node.Stats().IPv4.AvailableIPs)
   509  	require.Equal(t, 10, node.Stats().IPv4.UsedIPs)
   510  }
   511  
   512  // TestNodeManagerAbortRelease tests aborting IP release handshake if a new allocation on the node results in excess
   513  // being resolved
   514  func TestNodeManagerAbortRelease(t *testing.T) {
   515  	var wg sync.WaitGroup
   516  	operatorOption.Config.ExcessIPReleaseDelay = 2
   517  	am := newAllocationImplementationMock()
   518  	require.NotNil(t, am)
   519  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, true, false)
   520  	require.Nil(t, err)
   521  	require.NotNil(t, mngr)
   522  
   523  	// Announce node, wait for IPs to become available
   524  	cn := newCiliumNode("node3", 1, 3, 0)
   525  	mngr.Upsert(cn)
   526  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 1*time.Second))
   527  
   528  	node := mngr.Get("node3")
   529  	require.NotNil(t, node)
   530  	require.Equal(t, 3, node.Stats().IPv4.AvailableIPs)
   531  	require.Equal(t, 0, node.Stats().IPv4.UsedIPs)
   532  
   533  	// Use 3 out of 4 IPs, no additional IPs should be allocated
   534  	mngr.Upsert(updateCiliumNode(cn, 3))
   535  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second))
   536  	node = mngr.Get("node3")
   537  	require.NotNil(t, node)
   538  	require.Equal(t, 4, node.Stats().IPv4.AvailableIPs)
   539  	require.Equal(t, 3, node.Stats().IPv4.UsedIPs)
   540  
   541  	mngr.Upsert(updateCiliumNode(node.resource, 2))
   542  	node = mngr.Get("node3")
   543  	require.NotNil(t, node)
   544  	require.Equal(t, 4, node.Stats().IPv4.AvailableIPs)
   545  	require.Equal(t, 2, node.Stats().IPv4.UsedIPs)
   546  
   547  	// Trigger resync manually, excess IPs should be released down to 3
   548  	// Excess timestamps should be registered after this trigger
   549  	node.instanceSync.Trigger()
   550  	wg.Add(1)
   551  
   552  	// Acknowledge release IPs after 3 secs
   553  	time.AfterFunc(3*time.Second, func() {
   554  		defer wg.Done()
   555  		// Excess delay duration should have elapsed by now, trigger resync again.
   556  		// IPs should be marked as excess
   557  		node.instanceSync.Trigger()
   558  		time.Sleep(1 * time.Second)
   559  		node.PopulateIPReleaseStatus(node.resource)
   560  
   561  		require.Equal(t, 1, len(node.resource.Status.IPAM.ReleaseIPs))
   562  
   563  		// Fake acknowledge IPs for release like agent would.
   564  		testipam.FakeAcknowledgeReleaseIps(node.resource)
   565  
   566  		// Use up one more IP to make excess = 0
   567  		mngr.Upsert(updateCiliumNode(node.resource, 3))
   568  		node.poolMaintainer.Trigger()
   569  		// Resync one more time to process acknowledgements.
   570  		node.instanceSync.Trigger()
   571  
   572  		time.Sleep(1 * time.Second)
   573  		node.PopulateIPReleaseStatus(node.resource)
   574  
   575  		// Verify that the entry for previously marked IP is removed, instead of being set to released state.
   576  		require.Equal(t, 0, len(node.resource.Status.IPAM.ReleaseIPs))
   577  	})
   578  
   579  	require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, "node3", 0) }, 5*time.Second))
   580  	wg.Wait()
   581  	node = mngr.Get("node3")
   582  	require.NotNil(t, node)
   583  	require.Equal(t, 4, node.Stats().IPv4.AvailableIPs)
   584  	require.Equal(t, 3, node.Stats().IPv4.UsedIPs)
   585  }
   586  
   587  type nodeState struct {
   588  	cn           *v2.CiliumNode
   589  	name         string
   590  	instanceName string
   591  }
   592  
   593  // TestNodeManagerManyNodes tests IP allocation of 100 nodes across 3 subnets
   594  //
   595  // - MinAllocate 10
   596  // - PreAllocate 1
   597  func TestNodeManagerManyNodes(t *testing.T) {
   598  	const (
   599  		numNodes    = 100
   600  		minAllocate = 10
   601  	)
   602  
   603  	am := newAllocationImplementationMock()
   604  	require.NotNil(t, am)
   605  	metricsapi := metricsmock.NewMockMetrics()
   606  	mngr, err := NewNodeManager(am, k8sapi, metricsapi, 10, false, false)
   607  	require.Nil(t, err)
   608  	require.NotNil(t, mngr)
   609  
   610  	state := make([]*nodeState, numNodes)
   611  
   612  	for i := range state {
   613  		s := &nodeState{name: fmt.Sprintf("node%d", i), instanceName: fmt.Sprintf("i-testNodeManagerManyNodes-%d", i)}
   614  		s.cn = newCiliumNode(s.name, 1, minAllocate, 0)
   615  		state[i] = s
   616  		mngr.Upsert(s.cn)
   617  	}
   618  
   619  	for _, s := range state {
   620  		require.Nil(t, testutils.WaitUntil(func() bool { return reachedAddressesNeeded(mngr, s.name, 0) }, 5*time.Second))
   621  
   622  		node := mngr.Get(s.name)
   623  		require.NotNil(t, node)
   624  		if node.Stats().IPv4.AvailableIPs != minAllocate {
   625  			t.Errorf("Node %s allocation mismatch. expected: %d allocated: %d", s.name, minAllocate, node.Stats().IPv4.AvailableIPs)
   626  			t.Fail()
   627  		}
   628  		require.Equal(t, 0, node.Stats().IPv4.UsedIPs)
   629  	}
   630  
   631  	// The above check returns as soon as the address requirements are met.
   632  	// The metrics may still be oudated, resync all nodes to update
   633  	// metrics.
   634  	mngr.Resync(context.TODO(), time.Now())
   635  
   636  	require.Equal(t, numNodes, metricsapi.Nodes("total"))
   637  	require.Equal(t, 0, metricsapi.Nodes("in-deficit"))
   638  	require.Equal(t, 0, metricsapi.Nodes("at-capacity"))
   639  
   640  	require.Equal(t, numNodes*minAllocate, metricsapi.AllocatedIPs("available"))
   641  	require.Equal(t, 0, metricsapi.AllocatedIPs("needed"))
   642  	require.Equal(t, 0, metricsapi.AllocatedIPs("used"))
   643  
   644  	require.NotEqual(t, 0, metricsapi.ResyncCount())
   645  }
   646  
   647  func benchmarkAllocWorker(b *testing.B, workers int64, delay time.Duration, rateLimit float64, burst int) {
   648  	am := newAllocationImplementationMock()
   649  	require.NotNil(b, am)
   650  	mngr, err := NewNodeManager(am, k8sapi, metricsmock.NewMockMetrics(), 10, false, false)
   651  	require.Nil(b, err)
   652  	require.NotNil(b, mngr)
   653  
   654  	state := make([]*nodeState, b.N)
   655  
   656  	b.ResetTimer()
   657  	for i := range state {
   658  		s := &nodeState{name: fmt.Sprintf("node%d", i), instanceName: fmt.Sprintf("i-benchmarkAllocWorker-%d", i)}
   659  		s.cn = newCiliumNode(s.name, 1, 10, 0)
   660  		state[i] = s
   661  		mngr.Upsert(s.cn)
   662  	}
   663  
   664  restart:
   665  	for _, s := range state {
   666  		if !reachedAddressesNeeded(mngr, s.name, 0) {
   667  			time.Sleep(5 * time.Millisecond)
   668  			goto restart
   669  		}
   670  	}
   671  	b.StopTimer()
   672  
   673  }
   674  
   675  func BenchmarkAllocDelay20Worker1(b *testing.B) {
   676  	benchmarkAllocWorker(b, 1, 20*time.Millisecond, 100.0, 4)
   677  }
   678  func BenchmarkAllocDelay20Worker10(b *testing.B) {
   679  	benchmarkAllocWorker(b, 10, 20*time.Millisecond, 100.0, 4)
   680  }
   681  func BenchmarkAllocDelay20Worker50(b *testing.B) {
   682  	benchmarkAllocWorker(b, 50, 20*time.Millisecond, 100.0, 4)
   683  }
   684  func BenchmarkAllocDelay50Worker1(b *testing.B) {
   685  	benchmarkAllocWorker(b, 1, 50*time.Millisecond, 100.0, 4)
   686  }
   687  func BenchmarkAllocDelay50Worker10(b *testing.B) {
   688  	benchmarkAllocWorker(b, 10, 50*time.Millisecond, 100.0, 4)
   689  }
   690  func BenchmarkAllocDelay50Worker50(b *testing.B) {
   691  	benchmarkAllocWorker(b, 50, 50*time.Millisecond, 100.0, 4)
   692  }