github.com/cilium/cilium@v1.16.2/pkg/ipam/allocator/multipool/node_handler.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package multipool
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  
    11  	k8sErrors "k8s.io/apimachinery/pkg/api/errors"
    12  
    13  	"github.com/cilium/cilium/pkg/controller"
    14  	"github.com/cilium/cilium/pkg/ipam"
    15  	"github.com/cilium/cilium/pkg/ipam/allocator"
    16  	v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    17  	"github.com/cilium/cilium/pkg/lock"
    18  	"github.com/cilium/cilium/pkg/logging/logfields"
    19  	"github.com/cilium/cilium/pkg/time"
    20  )
    21  
    22  type NodeHandler struct {
    23  	mutex lock.Mutex
    24  
    25  	poolManager *PoolAllocator
    26  	nodeUpdater ipam.CiliumNodeGetterUpdater
    27  
    28  	nodesPendingAllocation map[string]*v2.CiliumNode
    29  	restoreFinished        bool
    30  
    31  	controllerManager                *controller.Manager
    32  	controllerErrorRetryBaseDuration time.Duration // only set in unit tests
    33  }
    34  
    35  var ipamMultipoolSyncControllerGroup = controller.NewGroup("ipam-multi-pool-sync")
    36  
    37  var _ allocator.NodeEventHandler = (*NodeHandler)(nil)
    38  
    39  func NewNodeHandler(manager *PoolAllocator, nodeUpdater ipam.CiliumNodeGetterUpdater) *NodeHandler {
    40  	return &NodeHandler{
    41  		poolManager:            manager,
    42  		nodeUpdater:            nodeUpdater,
    43  		nodesPendingAllocation: map[string]*v2.CiliumNode{},
    44  		controllerManager:      controller.NewManager(),
    45  	}
    46  }
    47  
    48  func (n *NodeHandler) Upsert(resource *v2.CiliumNode) {
    49  	n.mutex.Lock()
    50  	defer n.mutex.Unlock()
    51  	n.upsertLocked(resource)
    52  }
    53  
    54  func (n *NodeHandler) Delete(resource *v2.CiliumNode) {
    55  	n.mutex.Lock()
    56  	defer n.mutex.Unlock()
    57  
    58  	err := n.poolManager.ReleaseNode(resource.Name)
    59  	if err != nil {
    60  		log.WithField(logfields.NodeName, resource.Name).
    61  			WithError(err).
    62  			Warning("Errors while release node and its CIDRs")
    63  	}
    64  
    65  	delete(n.nodesPendingAllocation, resource.Name)
    66  
    67  	// Make sure any pending update controller is stopped
    68  	n.controllerManager.RemoveController(controllerName(resource.Name))
    69  }
    70  
    71  func (n *NodeHandler) Resync(context.Context, time.Time) {
    72  	n.mutex.Lock()
    73  	defer n.mutex.Unlock()
    74  
    75  	n.poolManager.RestoreFinished()
    76  	for _, cn := range n.nodesPendingAllocation {
    77  		delete(n.nodesPendingAllocation, cn.Name)
    78  		n.createUpsertController(cn)
    79  	}
    80  	n.restoreFinished = true
    81  	n.nodesPendingAllocation = nil
    82  }
    83  
    84  func (n *NodeHandler) upsertLocked(resource *v2.CiliumNode) {
    85  	if !n.restoreFinished {
    86  		n.nodesPendingAllocation[resource.Name] = resource
    87  		_ = n.poolManager.AllocateToNode(resource)
    88  		return
    89  	}
    90  
    91  	n.createUpsertController(resource)
    92  }
    93  
    94  func (n *NodeHandler) createUpsertController(resource *v2.CiliumNode) {
    95  	// This controller serves two purposes:
    96  	// 1. It will retry allocations upon failure, e.g. if a pool does not exist yet.
    97  	// 2. Will try to synchronize the allocator's state with the CiliumNode CRD in k8s.
    98  	refetchNode := false
    99  	n.controllerManager.UpdateController(controllerName(resource.Name), controller.ControllerParams{
   100  		Group:                  ipamMultipoolSyncControllerGroup,
   101  		ErrorRetryBaseDuration: n.controllerErrorRetryBaseDuration,
   102  		DoFunc: func(ctx context.Context) error {
   103  			// errorMessage is written to the resource status
   104  			errorMessage := ""
   105  			var controllerErr error
   106  
   107  			// If a previous run of the controller failed due to a conflict,
   108  			// we need to re-fetch the node to make sure we have the latest version.
   109  			if refetchNode {
   110  				resource, controllerErr = n.nodeUpdater.Get(resource.Name)
   111  				if controllerErr != nil {
   112  					return controllerErr
   113  				}
   114  				refetchNode = false
   115  			}
   116  
   117  			err := n.poolManager.AllocateToNode(resource)
   118  			if err != nil {
   119  				log.WithField(logfields.NodeName, resource.Name).WithError(err).
   120  					Warning("Failed to allocate PodCIDRs to node")
   121  				errorMessage = err.Error()
   122  				controllerErr = err
   123  			}
   124  
   125  			newResource := resource.DeepCopy()
   126  			newResource.Status.IPAM.OperatorStatus.Error = errorMessage
   127  
   128  			newResource.Spec.IPAM.Pools.Allocated = n.poolManager.AllocatedPools(newResource.Name)
   129  
   130  			if !newResource.Spec.IPAM.Pools.DeepEqual(&resource.Spec.IPAM.Pools) {
   131  				_, err = n.nodeUpdater.Update(resource, newResource)
   132  				if err != nil {
   133  					controllerErr = errors.Join(controllerErr, fmt.Errorf("failed to update spec: %w", err))
   134  					if k8sErrors.IsConflict(err) {
   135  						refetchNode = true
   136  					}
   137  				}
   138  			}
   139  
   140  			if !newResource.Status.IPAM.OperatorStatus.DeepEqual(&resource.Status.IPAM.OperatorStatus) && !refetchNode {
   141  				_, err = n.nodeUpdater.UpdateStatus(resource, newResource)
   142  				if err != nil {
   143  					controllerErr = errors.Join(controllerErr, fmt.Errorf("failed to update status: %w", err))
   144  					if k8sErrors.IsConflict(err) {
   145  						refetchNode = true
   146  					}
   147  				}
   148  			}
   149  
   150  			return controllerErr
   151  		},
   152  	})
   153  }
   154  
   155  func controllerName(nodeName string) string {
   156  	return "ipam-multi-pool-sync-" + nodeName
   157  }