github.com/cilium/cilium@v1.16.2/pkg/ipam/allocator/multipool/node_handler.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package multipool 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 11 k8sErrors "k8s.io/apimachinery/pkg/api/errors" 12 13 "github.com/cilium/cilium/pkg/controller" 14 "github.com/cilium/cilium/pkg/ipam" 15 "github.com/cilium/cilium/pkg/ipam/allocator" 16 v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 17 "github.com/cilium/cilium/pkg/lock" 18 "github.com/cilium/cilium/pkg/logging/logfields" 19 "github.com/cilium/cilium/pkg/time" 20 ) 21 22 type NodeHandler struct { 23 mutex lock.Mutex 24 25 poolManager *PoolAllocator 26 nodeUpdater ipam.CiliumNodeGetterUpdater 27 28 nodesPendingAllocation map[string]*v2.CiliumNode 29 restoreFinished bool 30 31 controllerManager *controller.Manager 32 controllerErrorRetryBaseDuration time.Duration // only set in unit tests 33 } 34 35 var ipamMultipoolSyncControllerGroup = controller.NewGroup("ipam-multi-pool-sync") 36 37 var _ allocator.NodeEventHandler = (*NodeHandler)(nil) 38 39 func NewNodeHandler(manager *PoolAllocator, nodeUpdater ipam.CiliumNodeGetterUpdater) *NodeHandler { 40 return &NodeHandler{ 41 poolManager: manager, 42 nodeUpdater: nodeUpdater, 43 nodesPendingAllocation: map[string]*v2.CiliumNode{}, 44 controllerManager: controller.NewManager(), 45 } 46 } 47 48 func (n *NodeHandler) Upsert(resource *v2.CiliumNode) { 49 n.mutex.Lock() 50 defer n.mutex.Unlock() 51 n.upsertLocked(resource) 52 } 53 54 func (n *NodeHandler) Delete(resource *v2.CiliumNode) { 55 n.mutex.Lock() 56 defer n.mutex.Unlock() 57 58 err := n.poolManager.ReleaseNode(resource.Name) 59 if err != nil { 60 log.WithField(logfields.NodeName, resource.Name). 61 WithError(err). 62 Warning("Errors while release node and its CIDRs") 63 } 64 65 delete(n.nodesPendingAllocation, resource.Name) 66 67 // Make sure any pending update controller is stopped 68 n.controllerManager.RemoveController(controllerName(resource.Name)) 69 } 70 71 func (n *NodeHandler) Resync(context.Context, time.Time) { 72 n.mutex.Lock() 73 defer n.mutex.Unlock() 74 75 n.poolManager.RestoreFinished() 76 for _, cn := range n.nodesPendingAllocation { 77 delete(n.nodesPendingAllocation, cn.Name) 78 n.createUpsertController(cn) 79 } 80 n.restoreFinished = true 81 n.nodesPendingAllocation = nil 82 } 83 84 func (n *NodeHandler) upsertLocked(resource *v2.CiliumNode) { 85 if !n.restoreFinished { 86 n.nodesPendingAllocation[resource.Name] = resource 87 _ = n.poolManager.AllocateToNode(resource) 88 return 89 } 90 91 n.createUpsertController(resource) 92 } 93 94 func (n *NodeHandler) createUpsertController(resource *v2.CiliumNode) { 95 // This controller serves two purposes: 96 // 1. It will retry allocations upon failure, e.g. if a pool does not exist yet. 97 // 2. Will try to synchronize the allocator's state with the CiliumNode CRD in k8s. 98 refetchNode := false 99 n.controllerManager.UpdateController(controllerName(resource.Name), controller.ControllerParams{ 100 Group: ipamMultipoolSyncControllerGroup, 101 ErrorRetryBaseDuration: n.controllerErrorRetryBaseDuration, 102 DoFunc: func(ctx context.Context) error { 103 // errorMessage is written to the resource status 104 errorMessage := "" 105 var controllerErr error 106 107 // If a previous run of the controller failed due to a conflict, 108 // we need to re-fetch the node to make sure we have the latest version. 109 if refetchNode { 110 resource, controllerErr = n.nodeUpdater.Get(resource.Name) 111 if controllerErr != nil { 112 return controllerErr 113 } 114 refetchNode = false 115 } 116 117 err := n.poolManager.AllocateToNode(resource) 118 if err != nil { 119 log.WithField(logfields.NodeName, resource.Name).WithError(err). 120 Warning("Failed to allocate PodCIDRs to node") 121 errorMessage = err.Error() 122 controllerErr = err 123 } 124 125 newResource := resource.DeepCopy() 126 newResource.Status.IPAM.OperatorStatus.Error = errorMessage 127 128 newResource.Spec.IPAM.Pools.Allocated = n.poolManager.AllocatedPools(newResource.Name) 129 130 if !newResource.Spec.IPAM.Pools.DeepEqual(&resource.Spec.IPAM.Pools) { 131 _, err = n.nodeUpdater.Update(resource, newResource) 132 if err != nil { 133 controllerErr = errors.Join(controllerErr, fmt.Errorf("failed to update spec: %w", err)) 134 if k8sErrors.IsConflict(err) { 135 refetchNode = true 136 } 137 } 138 } 139 140 if !newResource.Status.IPAM.OperatorStatus.DeepEqual(&resource.Status.IPAM.OperatorStatus) && !refetchNode { 141 _, err = n.nodeUpdater.UpdateStatus(resource, newResource) 142 if err != nil { 143 controllerErr = errors.Join(controllerErr, fmt.Errorf("failed to update status: %w", err)) 144 if k8sErrors.IsConflict(err) { 145 refetchNode = true 146 } 147 } 148 } 149 150 return controllerErr 151 }, 152 }) 153 } 154 155 func controllerName(nodeName string) string { 156 return "ipam-multi-pool-sync-" + nodeName 157 }