github.com/cilium/cilium@v1.16.2/pkg/ipam/crd.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package ipam 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "net" 11 "reflect" 12 "strconv" 13 "sync" 14 15 "github.com/sirupsen/logrus" 16 "github.com/vishvananda/netlink" 17 "golang.org/x/sys/unix" 18 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 "k8s.io/apimachinery/pkg/fields" 20 "k8s.io/apimachinery/pkg/util/wait" 21 "k8s.io/client-go/tools/cache" 22 23 alibabaCloud "github.com/cilium/cilium/pkg/alibabacloud/utils" 24 "github.com/cilium/cilium/pkg/cidr" 25 "github.com/cilium/cilium/pkg/ip" 26 ipamOption "github.com/cilium/cilium/pkg/ipam/option" 27 ipamTypes "github.com/cilium/cilium/pkg/ipam/types" 28 ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 29 "github.com/cilium/cilium/pkg/k8s/client" 30 "github.com/cilium/cilium/pkg/k8s/informer" 31 "github.com/cilium/cilium/pkg/k8s/utils" 32 "github.com/cilium/cilium/pkg/lock" 33 "github.com/cilium/cilium/pkg/logging/logfields" 34 "github.com/cilium/cilium/pkg/node" 35 nodeTypes "github.com/cilium/cilium/pkg/node/types" 36 "github.com/cilium/cilium/pkg/option" 37 "github.com/cilium/cilium/pkg/time" 38 "github.com/cilium/cilium/pkg/trigger" 39 ) 40 41 var ( 42 sharedNodeStore *nodeStore 43 initNodeStore sync.Once 44 ) 45 46 const ( 47 fieldName = "name" 48 ) 49 50 // nodeStore represents a CiliumNode custom resource and binds the CR to a list 51 // of allocators 52 type nodeStore struct { 53 // mutex protects access to all members of this struct 54 mutex lock.RWMutex 55 56 // ownNode is the last known version of the own node resource 57 ownNode *ciliumv2.CiliumNode 58 59 // allocators is a list of allocators tied to this custom resource 60 allocators []*crdAllocator 61 62 // refreshTrigger is the configured trigger to synchronize updates to 63 // the custom resource with rate limiting 64 refreshTrigger *trigger.Trigger 65 66 // allocationPoolSize is the size of the IP pool for each address 67 // family 68 allocationPoolSize map[Family]int 69 70 // signal for completion of restoration 71 restoreFinished chan struct{} 72 restoreCloseOnce sync.Once 73 74 clientset client.Clientset 75 76 conf *option.DaemonConfig 77 mtuConfig MtuConfiguration 78 } 79 80 // newNodeStore initializes a new store which reflects the CiliumNode custom 81 // resource of the specified node name 82 func newNodeStore(nodeName string, conf *option.DaemonConfig, owner Owner, localNodeStore *node.LocalNodeStore, clientset client.Clientset, k8sEventReg K8sEventRegister, mtuConfig MtuConfiguration) *nodeStore { 83 log.WithField(fieldName, nodeName).Info("Subscribed to CiliumNode custom resource") 84 85 store := &nodeStore{ 86 allocators: []*crdAllocator{}, 87 allocationPoolSize: map[Family]int{}, 88 conf: conf, 89 mtuConfig: mtuConfig, 90 clientset: clientset, 91 } 92 store.restoreFinished = make(chan struct{}) 93 94 t, err := trigger.NewTrigger(trigger.Parameters{ 95 Name: "crd-allocator-node-refresher", 96 MinInterval: conf.IPAMCiliumNodeUpdateRate, 97 TriggerFunc: store.refreshNodeTrigger, 98 }) 99 if err != nil { 100 log.WithError(err).Fatal("Unable to initialize CiliumNode synchronization trigger") 101 } 102 store.refreshTrigger = t 103 104 // Create the CiliumNode custom resource. This call will block until 105 // the custom resource has been created 106 owner.UpdateCiliumNodeResource() 107 apiGroup := "cilium/v2::CiliumNode" 108 ciliumNodeSelector := fields.ParseSelectorOrDie("metadata.name=" + nodeName) 109 _, ciliumNodeInformer := informer.NewInformer( 110 utils.ListerWatcherWithFields( 111 utils.ListerWatcherFromTyped[*ciliumv2.CiliumNodeList](clientset.CiliumV2().CiliumNodes()), 112 ciliumNodeSelector), 113 &ciliumv2.CiliumNode{}, 114 0, 115 cache.ResourceEventHandlerFuncs{ 116 AddFunc: func(obj interface{}) { 117 var valid, equal bool 118 defer func() { k8sEventReg.K8sEventReceived(apiGroup, "CiliumNode", "create", valid, equal) }() 119 if node, ok := obj.(*ciliumv2.CiliumNode); ok { 120 valid = true 121 store.updateLocalNodeResource(node.DeepCopy()) 122 k8sEventReg.K8sEventProcessed("CiliumNode", "create", true) 123 } else { 124 log.Warningf("Unknown CiliumNode object type %s received: %+v", reflect.TypeOf(obj), obj) 125 } 126 }, 127 UpdateFunc: func(oldObj, newObj interface{}) { 128 var valid, equal bool 129 defer func() { k8sEventReg.K8sEventReceived(apiGroup, "CiliumNode", "update", valid, equal) }() 130 if oldNode, ok := oldObj.(*ciliumv2.CiliumNode); ok { 131 if newNode, ok := newObj.(*ciliumv2.CiliumNode); ok { 132 valid = true 133 newNode = newNode.DeepCopy() 134 if oldNode.DeepEqual(newNode) { 135 // The UpdateStatus call in refreshNode requires an up-to-date 136 // CiliumNode.ObjectMeta.ResourceVersion. Therefore, we store the most 137 // recent version here even if the nodes are equal, because 138 // CiliumNode.DeepEqual will consider two nodes to be equal even if 139 // their resource version differs. 140 store.setOwnNodeWithoutPoolUpdate(newNode) 141 equal = true 142 return 143 } 144 store.updateLocalNodeResource(newNode) 145 k8sEventReg.K8sEventProcessed("CiliumNode", "update", true) 146 } else { 147 log.Warningf("Unknown CiliumNode object type %T received: %+v", oldNode, oldNode) 148 } 149 } else { 150 log.Warningf("Unknown CiliumNode object type %T received: %+v", oldNode, oldNode) 151 } 152 }, 153 DeleteFunc: func(obj interface{}) { 154 // Given we are watching a single specific 155 // resource using the node name, any delete 156 // notification means that the resource 157 // matching the local node name has been 158 // removed. No attempt to cast is required. 159 store.deleteLocalNodeResource() 160 k8sEventReg.K8sEventProcessed("CiliumNode", "delete", true) 161 k8sEventReg.K8sEventReceived(apiGroup, "CiliumNode", "delete", true, false) 162 }, 163 }, 164 nil, 165 ) 166 167 go ciliumNodeInformer.Run(wait.NeverStop) 168 169 log.WithField(fieldName, nodeName).Info("Waiting for CiliumNode custom resource to become available...") 170 if ok := cache.WaitForCacheSync(wait.NeverStop, ciliumNodeInformer.HasSynced); !ok { 171 log.WithField(fieldName, nodeName).Fatal("Unable to synchronize CiliumNode custom resource") 172 } else { 173 log.WithField(fieldName, nodeName).Info("Successfully synchronized CiliumNode custom resource") 174 } 175 176 for { 177 minimumReached, required, numAvailable := store.hasMinimumIPsInPool(localNodeStore) 178 logFields := logrus.Fields{ 179 fieldName: nodeName, 180 "required": required, 181 "available": numAvailable, 182 } 183 if minimumReached { 184 log.WithFields(logFields).Info("All required IPs are available in CRD-backed allocation pool") 185 break 186 } 187 188 log.WithFields(logFields).WithField( 189 logfields.HelpMessage, 190 "Check if cilium-operator pod is running and does not have any warnings or error messages.", 191 ).Info("Waiting for IPs to become available in CRD-backed allocation pool") 192 time.Sleep(5 * time.Second) 193 } 194 195 go func() { 196 // Initial upstream sync must wait for the allocated IPs 197 // to be restored 198 <-store.restoreFinished 199 store.refreshTrigger.TriggerWithReason("initial sync") 200 }() 201 202 return store 203 } 204 205 func deriveVpcCIDRs(node *ciliumv2.CiliumNode) (primaryCIDR *cidr.CIDR, secondaryCIDRs []*cidr.CIDR) { 206 // A node belongs to a single VPC so we can pick the first ENI 207 // in the list and derive the VPC CIDR from it. 208 for _, eni := range node.Status.ENI.ENIs { 209 c, err := cidr.ParseCIDR(eni.VPC.PrimaryCIDR) 210 if err == nil { 211 primaryCIDR = c 212 for _, sc := range eni.VPC.CIDRs { 213 c, err = cidr.ParseCIDR(sc) 214 if err == nil { 215 secondaryCIDRs = append(secondaryCIDRs, c) 216 } 217 } 218 return 219 } 220 } 221 for _, azif := range node.Status.Azure.Interfaces { 222 c, err := cidr.ParseCIDR(azif.CIDR) 223 if err == nil { 224 primaryCIDR = c 225 return 226 } 227 } 228 // return AlibabaCloud vpc CIDR 229 if len(node.Status.AlibabaCloud.ENIs) > 0 { 230 c, err := cidr.ParseCIDR(node.Spec.AlibabaCloud.CIDRBlock) 231 if err == nil { 232 primaryCIDR = c 233 } 234 for _, eni := range node.Status.AlibabaCloud.ENIs { 235 for _, sc := range eni.VPC.SecondaryCIDRs { 236 c, err = cidr.ParseCIDR(sc) 237 if err == nil { 238 secondaryCIDRs = append(secondaryCIDRs, c) 239 } 240 } 241 return 242 } 243 } 244 return 245 } 246 247 func (n *nodeStore) autoDetectIPv4NativeRoutingCIDR(localNodeStore *node.LocalNodeStore) bool { 248 if primaryCIDR, secondaryCIDRs := deriveVpcCIDRs(n.ownNode); primaryCIDR != nil { 249 allCIDRs := append([]*cidr.CIDR{primaryCIDR}, secondaryCIDRs...) 250 if nativeCIDR := n.conf.GetIPv4NativeRoutingCIDR(); nativeCIDR != nil { 251 found := false 252 for _, vpcCIDR := range allCIDRs { 253 logFields := logrus.Fields{ 254 "vpc-cidr": vpcCIDR.String(), 255 option.IPv4NativeRoutingCIDR: nativeCIDR.String(), 256 } 257 258 ranges4, _ := ip.CoalesceCIDRs([]*net.IPNet{nativeCIDR.IPNet, vpcCIDR.IPNet}) 259 if len(ranges4) != 1 { 260 log.WithFields(logFields).Info("Native routing CIDR does not contain VPC CIDR, trying next") 261 } else { 262 found = true 263 log.WithFields(logFields).Info("Native routing CIDR contains VPC CIDR, ignoring autodetected VPC CIDRs.") 264 break 265 } 266 } 267 if !found { 268 log.Fatal("None of the VPC CIDRs contains the specified native routing CIDR") 269 } 270 } else { 271 log.WithFields(logrus.Fields{ 272 "vpc-cidr": primaryCIDR.String(), 273 }).Info("Using autodetected primary VPC CIDR.") 274 localNodeStore.Update(func(n *node.LocalNode) { 275 n.IPv4NativeRoutingCIDR = primaryCIDR 276 }) 277 } 278 return true 279 } else { 280 log.Info("Could not determine VPC CIDRs") 281 return false 282 } 283 } 284 285 // hasMinimumIPsInPool returns true if the required number of IPs is available 286 // in the allocation pool. It also returns the number of IPs required and 287 // available. 288 func (n *nodeStore) hasMinimumIPsInPool(localNodeStore *node.LocalNodeStore) (minimumReached bool, required, numAvailable int) { 289 n.mutex.RLock() 290 defer n.mutex.RUnlock() 291 292 if n.ownNode == nil { 293 return 294 } 295 296 switch { 297 case n.ownNode.Spec.IPAM.MinAllocate != 0: 298 required = n.ownNode.Spec.IPAM.MinAllocate 299 case n.ownNode.Spec.IPAM.PreAllocate != 0: 300 required = n.ownNode.Spec.IPAM.PreAllocate 301 case n.conf.HealthCheckingEnabled(): 302 required = 2 303 default: 304 required = 1 305 } 306 307 if n.ownNode.Spec.IPAM.Pool != nil { 308 for ip := range n.ownNode.Spec.IPAM.Pool { 309 if !n.isIPInReleaseHandshake(ip) { 310 numAvailable++ 311 } 312 } 313 if len(n.ownNode.Spec.IPAM.Pool) >= required { 314 minimumReached = true 315 } 316 317 if n.conf.IPAMMode() == ipamOption.IPAMENI || n.conf.IPAMMode() == ipamOption.IPAMAzure || n.conf.IPAMMode() == ipamOption.IPAMAlibabaCloud { 318 if !n.autoDetectIPv4NativeRoutingCIDR(localNodeStore) { 319 minimumReached = false 320 } 321 } 322 } 323 324 return 325 } 326 327 // deleteLocalNodeResource is called when the CiliumNode resource representing 328 // the local node has been deleted. 329 func (n *nodeStore) deleteLocalNodeResource() { 330 n.mutex.Lock() 331 n.ownNode = nil 332 n.mutex.Unlock() 333 } 334 335 // updateLocalNodeResource is called when the CiliumNode resource representing 336 // the local node has been added or updated. It updates the available IPs based 337 // on the custom resource passed into the function. 338 func (n *nodeStore) updateLocalNodeResource(node *ciliumv2.CiliumNode) { 339 n.mutex.Lock() 340 defer n.mutex.Unlock() 341 342 if n.conf.IPAMMode() == ipamOption.IPAMENI { 343 if err := configureENIDevices(n.ownNode, node, n.mtuConfig); err != nil { 344 log.WithError(err).Errorf("Failed to update routes and rules for ENIs") 345 } 346 } 347 348 n.ownNode = node 349 n.allocationPoolSize[IPv4] = 0 350 n.allocationPoolSize[IPv6] = 0 351 for ipString := range node.Spec.IPAM.Pool { 352 if ip := net.ParseIP(ipString); ip != nil { 353 if ip.To4() != nil { 354 n.allocationPoolSize[IPv4]++ 355 } else { 356 n.allocationPoolSize[IPv6]++ 357 } 358 } 359 } 360 361 releaseUpstreamSyncNeeded := false 362 // ACK or NACK IPs marked for release by the operator 363 for ip, status := range n.ownNode.Status.IPAM.ReleaseIPs { 364 if n.ownNode.Spec.IPAM.Pool == nil { 365 continue 366 } 367 // Ignore states that agent previously responded to. 368 if status == ipamOption.IPAMReadyForRelease || status == ipamOption.IPAMDoNotRelease { 369 continue 370 } 371 if _, ok := n.ownNode.Spec.IPAM.Pool[ip]; !ok { 372 if status == ipamOption.IPAMReleased { 373 // Remove entry from release-ips only when it is removed from .spec.ipam.pool as well 374 delete(n.ownNode.Status.IPAM.ReleaseIPs, ip) 375 releaseUpstreamSyncNeeded = true 376 377 // Remove the unreachable route for this IP 378 if n.conf.UnreachableRoutesEnabled() { 379 parsedIP := net.ParseIP(ip) 380 if parsedIP == nil { 381 // Unable to parse IP, no point in trying to remove the route 382 log.Warningf("Unable to parse IP %s", ip) 383 continue 384 } 385 386 err := netlink.RouteDel(&netlink.Route{ 387 Dst: &net.IPNet{IP: parsedIP, Mask: net.CIDRMask(32, 32)}, 388 Table: unix.RT_TABLE_MAIN, 389 Type: unix.RTN_UNREACHABLE, 390 }) 391 if err != nil && !errors.Is(err, unix.ESRCH) { 392 // We ignore ESRCH, as it means the entry was already deleted 393 log.WithError(err).Warningf("Unable to delete unreachable route for IP %s", ip) 394 continue 395 } 396 } 397 } else if status == ipamOption.IPAMMarkForRelease { 398 // NACK the IP, if this node doesn't own the IP 399 n.ownNode.Status.IPAM.ReleaseIPs[ip] = ipamOption.IPAMDoNotRelease 400 releaseUpstreamSyncNeeded = true 401 } 402 continue 403 } 404 405 // Ignore all other states, transition to do-not-release and ready-for-release are allowed only from 406 // marked-for-release 407 if status != ipamOption.IPAMMarkForRelease { 408 continue 409 } 410 // Retrieve the appropriate allocator 411 var allocator *crdAllocator 412 var ipFamily Family 413 if ipAddr := net.ParseIP(ip); ipAddr != nil { 414 ipFamily = DeriveFamily(ipAddr) 415 } 416 if ipFamily == "" { 417 continue 418 } 419 for _, a := range n.allocators { 420 if a.family == ipFamily { 421 allocator = a 422 } 423 } 424 if allocator == nil { 425 continue 426 } 427 428 // Some functions like crdAllocator.Allocate() acquire lock on allocator first and then on nodeStore. 429 // So release nodestore lock before acquiring allocator lock to avoid potential deadlocks from inconsistent 430 // lock ordering. 431 n.mutex.Unlock() 432 allocator.mutex.Lock() 433 _, ok := allocator.allocated[ip] 434 allocator.mutex.Unlock() 435 n.mutex.Lock() 436 437 if ok { 438 // IP still in use, update the operator to stop releasing the IP. 439 n.ownNode.Status.IPAM.ReleaseIPs[ip] = ipamOption.IPAMDoNotRelease 440 } else { 441 n.ownNode.Status.IPAM.ReleaseIPs[ip] = ipamOption.IPAMReadyForRelease 442 } 443 releaseUpstreamSyncNeeded = true 444 } 445 446 if releaseUpstreamSyncNeeded { 447 n.refreshTrigger.TriggerWithReason("excess IP release") 448 } 449 } 450 451 // setOwnNodeWithoutPoolUpdate overwrites the local node copy (e.g. to update 452 // its resourceVersion) without updating the available IP pool. 453 func (n *nodeStore) setOwnNodeWithoutPoolUpdate(node *ciliumv2.CiliumNode) { 454 n.mutex.Lock() 455 n.ownNode = node 456 n.mutex.Unlock() 457 } 458 459 // refreshNodeTrigger is called to refresh the custom resource after taking the 460 // configured rate limiting into account 461 // 462 // Note: The function signature includes the reasons argument in order to 463 // implement the trigger.TriggerFunc interface despite the argument being 464 // unused. 465 func (n *nodeStore) refreshNodeTrigger(reasons []string) { 466 if err := n.refreshNode(); err != nil { 467 log.WithError(err).Warning("Unable to update CiliumNode custom resource") 468 n.refreshTrigger.TriggerWithReason("retry after error") 469 } 470 } 471 472 // refreshNode updates the custom resource in the apiserver based on the latest 473 // information in the local node store 474 func (n *nodeStore) refreshNode() error { 475 n.mutex.RLock() 476 if n.ownNode == nil { 477 n.mutex.RUnlock() 478 return nil 479 } 480 481 node := n.ownNode.DeepCopy() 482 staleCopyOfAllocators := make([]*crdAllocator, len(n.allocators)) 483 copy(staleCopyOfAllocators, n.allocators) 484 n.mutex.RUnlock() 485 486 node.Status.IPAM.Used = ipamTypes.AllocationMap{} 487 488 for _, a := range staleCopyOfAllocators { 489 a.mutex.RLock() 490 for ip, ipInfo := range a.allocated { 491 node.Status.IPAM.Used[ip] = ipInfo 492 } 493 a.mutex.RUnlock() 494 } 495 496 var err error 497 _, err = n.clientset.CiliumV2().CiliumNodes().UpdateStatus(context.TODO(), node, metav1.UpdateOptions{}) 498 499 return err 500 } 501 502 // addAllocator adds a new CRD allocator to the node store 503 func (n *nodeStore) addAllocator(allocator *crdAllocator) { 504 n.mutex.Lock() 505 n.allocators = append(n.allocators, allocator) 506 n.mutex.Unlock() 507 } 508 509 // allocate checks if a particular IP can be allocated or return an error 510 func (n *nodeStore) allocate(ip net.IP) (*ipamTypes.AllocationIP, error) { 511 n.mutex.RLock() 512 defer n.mutex.RUnlock() 513 514 if n.ownNode == nil { 515 return nil, fmt.Errorf("CiliumNode for own node is not available") 516 } 517 518 if n.ownNode.Spec.IPAM.Pool == nil { 519 return nil, fmt.Errorf("No IPs available") 520 } 521 522 if n.isIPInReleaseHandshake(ip.String()) { 523 return nil, fmt.Errorf("IP not available, marked or ready for release") 524 } 525 526 ipInfo, ok := n.ownNode.Spec.IPAM.Pool[ip.String()] 527 if !ok { 528 return nil, NewIPNotAvailableInPoolError(ip) 529 } 530 531 return &ipInfo, nil 532 } 533 534 // isIPInReleaseHandshake validates if a given IP is currently in the process of being released 535 func (n *nodeStore) isIPInReleaseHandshake(ip string) bool { 536 if n.ownNode.Status.IPAM.ReleaseIPs == nil { 537 return false 538 } 539 if status, ok := n.ownNode.Status.IPAM.ReleaseIPs[ip]; ok { 540 if status == ipamOption.IPAMMarkForRelease || status == ipamOption.IPAMReadyForRelease || status == ipamOption.IPAMReleased { 541 return true 542 } 543 } 544 return false 545 } 546 547 // allocateNext allocates the next available IP or returns an error 548 func (n *nodeStore) allocateNext(allocated ipamTypes.AllocationMap, family Family, owner string) (net.IP, *ipamTypes.AllocationIP, error) { 549 n.mutex.RLock() 550 defer n.mutex.RUnlock() 551 552 if n.ownNode == nil { 553 return nil, nil, fmt.Errorf("CiliumNode for own node is not available") 554 } 555 556 // Check if IP has a custom owner (only supported in manual CRD mode) 557 if n.conf.IPAMMode() == ipamOption.IPAMCRD && len(owner) != 0 { 558 for ip, ipInfo := range n.ownNode.Spec.IPAM.Pool { 559 if ipInfo.Owner == owner { 560 parsedIP := net.ParseIP(ip) 561 if parsedIP == nil { 562 log.WithFields(logrus.Fields{ 563 fieldName: n.ownNode.Name, 564 "ip": ip, 565 }).Warning("Unable to parse IP in CiliumNode custom resource") 566 return nil, nil, fmt.Errorf("invalid custom ip %s for %s. ", ip, owner) 567 } 568 if DeriveFamily(parsedIP) != family { 569 continue 570 } 571 return parsedIP, &ipInfo, nil 572 } 573 } 574 } 575 576 // FIXME: This is currently using a brute-force method that can be 577 // optimized 578 for ip, ipInfo := range n.ownNode.Spec.IPAM.Pool { 579 if _, ok := allocated[ip]; !ok { 580 581 if n.isIPInReleaseHandshake(ip) { 582 continue // IP not available 583 } 584 if ipInfo.Owner != "" { 585 continue // IP is used by another 586 } 587 parsedIP := net.ParseIP(ip) 588 if parsedIP == nil { 589 log.WithFields(logrus.Fields{ 590 fieldName: n.ownNode.Name, 591 "ip": ip, 592 }).Warning("Unable to parse IP in CiliumNode custom resource") 593 continue 594 } 595 596 if DeriveFamily(parsedIP) != family { 597 continue 598 } 599 600 return parsedIP, &ipInfo, nil 601 } 602 } 603 604 return nil, nil, fmt.Errorf("No more IPs available") 605 } 606 607 // totalPoolSize returns the total size of the allocation pool 608 func (n *nodeStore) totalPoolSize(family Family) int { 609 n.mutex.RLock() 610 defer n.mutex.RUnlock() 611 612 if num, ok := n.allocationPoolSize[family]; ok { 613 return num 614 } 615 return 0 616 } 617 618 // crdAllocator implements the CRD-backed IP allocator 619 type crdAllocator struct { 620 // store is the node store backing the custom resource 621 store *nodeStore 622 623 // mutex protects access to the allocated map 624 mutex lock.RWMutex 625 626 // allocated is a map of all allocated IPs indexed by the allocated IP 627 // represented as string 628 allocated ipamTypes.AllocationMap 629 630 // family is the address family this allocator is allocator for 631 family Family 632 633 conf *option.DaemonConfig 634 } 635 636 // newCRDAllocator creates a new CRD-backed IP allocator 637 func newCRDAllocator(family Family, c *option.DaemonConfig, owner Owner, localNodeStore *node.LocalNodeStore, clientset client.Clientset, k8sEventReg K8sEventRegister, mtuConfig MtuConfiguration) Allocator { 638 initNodeStore.Do(func() { 639 sharedNodeStore = newNodeStore(nodeTypes.GetName(), c, owner, localNodeStore, clientset, k8sEventReg, mtuConfig) 640 }) 641 642 allocator := &crdAllocator{ 643 allocated: ipamTypes.AllocationMap{}, 644 family: family, 645 store: sharedNodeStore, 646 conf: c, 647 } 648 649 sharedNodeStore.addAllocator(allocator) 650 651 return allocator 652 } 653 654 // deriveGatewayIP accept the CIDR and the index of the IP in this CIDR. 655 func deriveGatewayIP(cidr string, index int) string { 656 _, ipNet, err := net.ParseCIDR(cidr) 657 if err != nil { 658 log.WithError(err).Warningf("Unable to parse subnet CIDR %s", cidr) 659 return "" 660 } 661 gw := ip.GetIPAtIndex(*ipNet, int64(index)) 662 if gw == nil { 663 return "" 664 } 665 return gw.String() 666 } 667 668 func (a *crdAllocator) buildAllocationResult(ip net.IP, ipInfo *ipamTypes.AllocationIP) (result *AllocationResult, err error) { 669 result = &AllocationResult{IP: ip} 670 671 a.store.mutex.RLock() 672 defer a.store.mutex.RUnlock() 673 674 if a.store.ownNode == nil { 675 return 676 } 677 678 switch a.conf.IPAMMode() { 679 680 // In ENI mode, the Resource points to the ENI so we can derive the 681 // master interface and all CIDRs of the VPC 682 case ipamOption.IPAMENI: 683 for _, eni := range a.store.ownNode.Status.ENI.ENIs { 684 if eni.ID == ipInfo.Resource { 685 result.PrimaryMAC = eni.MAC 686 result.CIDRs = []string{eni.VPC.PrimaryCIDR} 687 result.CIDRs = append(result.CIDRs, eni.VPC.CIDRs...) 688 // Add manually configured Native Routing CIDR 689 if a.conf.GetIPv4NativeRoutingCIDR() != nil { 690 result.CIDRs = append(result.CIDRs, a.conf.GetIPv4NativeRoutingCIDR().String()) 691 } 692 if eni.Subnet.CIDR != "" { 693 // The gateway for a subnet and VPC is always x.x.x.1 694 // Ref: https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Route_Tables.html 695 result.GatewayIP = deriveGatewayIP(eni.Subnet.CIDR, 1) 696 } 697 result.InterfaceNumber = strconv.Itoa(eni.Number) 698 699 return 700 } 701 } 702 return nil, fmt.Errorf("unable to find ENI %s", ipInfo.Resource) 703 704 // In Azure mode, the Resource points to the azure interface so we can 705 // derive the master interface 706 case ipamOption.IPAMAzure: 707 for _, iface := range a.store.ownNode.Status.Azure.Interfaces { 708 if iface.ID == ipInfo.Resource { 709 result.PrimaryMAC = iface.MAC 710 result.GatewayIP = iface.Gateway 711 result.CIDRs = append(result.CIDRs, iface.CIDR) 712 // For now, we can hardcode the interface number to a valid 713 // integer because it will not be used in the allocation result 714 // anyway. To elaborate, Azure IPAM mode automatically sets 715 // option.Config.EgressMultiHomeIPRuleCompat to true, meaning 716 // that the CNI will not use the interface number when creating 717 // the pod rules and routes. We are hardcoding simply to bypass 718 // the parsing errors when InterfaceNumber is empty. See 719 // https://github.com/cilium/cilium/issues/15496. 720 // 721 // TODO: Once https://github.com/cilium/cilium/issues/14705 is 722 // resolved, then we don't need to hardcode this anymore. 723 result.InterfaceNumber = "0" 724 return 725 } 726 } 727 return nil, fmt.Errorf("unable to find ENI %s", ipInfo.Resource) 728 729 // In AlibabaCloud mode, the Resource points to the ENI so we can derive the 730 // master interface and all CIDRs of the VPC 731 case ipamOption.IPAMAlibabaCloud: 732 for _, eni := range a.store.ownNode.Status.AlibabaCloud.ENIs { 733 if eni.NetworkInterfaceID != ipInfo.Resource { 734 continue 735 } 736 result.PrimaryMAC = eni.MACAddress 737 result.CIDRs = []string{eni.VSwitch.CIDRBlock} 738 739 // Ref: https://www.alibabacloud.com/help/doc-detail/65398.html 740 result.GatewayIP = deriveGatewayIP(eni.VSwitch.CIDRBlock, -3) 741 result.InterfaceNumber = strconv.Itoa(alibabaCloud.GetENIIndexFromTags(eni.Tags)) 742 return 743 } 744 return nil, fmt.Errorf("unable to find ENI %s", ipInfo.Resource) 745 } 746 747 return 748 } 749 750 // Allocate will attempt to find the specified IP in the custom resource and 751 // allocate it if it is available. If the IP is unavailable or already 752 // allocated, an error is returned. The custom resource will be updated to 753 // reflect the newly allocated IP. 754 func (a *crdAllocator) Allocate(ip net.IP, owner string, pool Pool) (*AllocationResult, error) { 755 a.mutex.Lock() 756 defer a.mutex.Unlock() 757 758 if _, ok := a.allocated[ip.String()]; ok { 759 return nil, fmt.Errorf("IP already in use") 760 } 761 762 ipInfo, err := a.store.allocate(ip) 763 if err != nil { 764 return nil, err 765 } 766 767 result, err := a.buildAllocationResult(ip, ipInfo) 768 if err != nil { 769 return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err) 770 } 771 772 a.markAllocated(ip, owner, *ipInfo) 773 // Update custom resource to reflect the newly allocated IP. 774 a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("allocation of IP %s", ip.String())) 775 776 return result, nil 777 } 778 779 // AllocateWithoutSyncUpstream will attempt to find the specified IP in the 780 // custom resource and allocate it if it is available. If the IP is 781 // unavailable or already allocated, an error is returned. The custom resource 782 // will not be updated. 783 func (a *crdAllocator) AllocateWithoutSyncUpstream(ip net.IP, owner string, pool Pool) (*AllocationResult, error) { 784 a.mutex.Lock() 785 defer a.mutex.Unlock() 786 787 if _, ok := a.allocated[ip.String()]; ok { 788 return nil, fmt.Errorf("IP already in use") 789 } 790 791 ipInfo, err := a.store.allocate(ip) 792 if err != nil { 793 return nil, err 794 } 795 796 result, err := a.buildAllocationResult(ip, ipInfo) 797 if err != nil { 798 return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err) 799 } 800 801 a.markAllocated(ip, owner, *ipInfo) 802 803 return result, nil 804 } 805 806 // Release will release the specified IP or return an error if the IP has not 807 // been allocated before. The custom resource will be updated to reflect the 808 // released IP. 809 func (a *crdAllocator) Release(ip net.IP, pool Pool) error { 810 a.mutex.Lock() 811 defer a.mutex.Unlock() 812 813 if _, ok := a.allocated[ip.String()]; !ok { 814 return fmt.Errorf("IP %s is not allocated", ip.String()) 815 } 816 817 delete(a.allocated, ip.String()) 818 // Update custom resource to reflect the newly released IP. 819 a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("release of IP %s", ip.String())) 820 821 return nil 822 } 823 824 // markAllocated marks a particular IP as allocated 825 func (a *crdAllocator) markAllocated(ip net.IP, owner string, ipInfo ipamTypes.AllocationIP) { 826 ipInfo.Owner = owner 827 a.allocated[ip.String()] = ipInfo 828 } 829 830 // AllocateNext allocates the next available IP as offered by the custom 831 // resource or return an error if no IP is available. The custom resource will 832 // be updated to reflect the newly allocated IP. 833 func (a *crdAllocator) AllocateNext(owner string, pool Pool) (*AllocationResult, error) { 834 a.mutex.Lock() 835 defer a.mutex.Unlock() 836 837 ip, ipInfo, err := a.store.allocateNext(a.allocated, a.family, owner) 838 if err != nil { 839 return nil, err 840 } 841 842 result, err := a.buildAllocationResult(ip, ipInfo) 843 if err != nil { 844 return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err) 845 } 846 847 a.markAllocated(ip, owner, *ipInfo) 848 // Update custom resource to reflect the newly allocated IP. 849 a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("allocation of IP %s", ip.String())) 850 851 return result, nil 852 } 853 854 // AllocateNextWithoutSyncUpstream allocates the next available IP as offered 855 // by the custom resource or return an error if no IP is available. The custom 856 // resource will not be updated. 857 func (a *crdAllocator) AllocateNextWithoutSyncUpstream(owner string, pool Pool) (*AllocationResult, error) { 858 a.mutex.Lock() 859 defer a.mutex.Unlock() 860 861 ip, ipInfo, err := a.store.allocateNext(a.allocated, a.family, owner) 862 if err != nil { 863 return nil, err 864 } 865 866 result, err := a.buildAllocationResult(ip, ipInfo) 867 if err != nil { 868 return nil, fmt.Errorf("failed to associate IP %s inside CiliumNode: %w", ip, err) 869 } 870 871 a.markAllocated(ip, owner, *ipInfo) 872 873 return result, nil 874 } 875 876 // Dump provides a status report and lists all allocated IP addresses 877 func (a *crdAllocator) Dump() (map[Pool]map[string]string, string) { 878 a.mutex.RLock() 879 defer a.mutex.RUnlock() 880 881 allocs := make(map[string]string, len(a.allocated)) 882 for ip := range a.allocated { 883 allocs[ip] = "" 884 } 885 886 status := fmt.Sprintf("%d/%d allocated", len(allocs), a.store.totalPoolSize(a.family)) 887 return map[Pool]map[string]string{PoolDefault(): allocs}, status 888 } 889 890 func (a *crdAllocator) Capacity() uint64 { 891 a.mutex.RLock() 892 defer a.mutex.RUnlock() 893 return uint64(a.store.totalPoolSize(a.family)) 894 } 895 896 // RestoreFinished marks the status of restoration as done 897 func (a *crdAllocator) RestoreFinished() { 898 a.store.restoreCloseOnce.Do(func() { 899 close(a.store.restoreFinished) 900 }) 901 } 902 903 // NewIPNotAvailableInPoolError returns an error resprenting the given IP not 904 // being available in the IPAM pool. 905 func NewIPNotAvailableInPoolError(ip net.IP) error { 906 return &ErrIPNotAvailableInPool{ip: ip} 907 } 908 909 // ErrIPNotAvailableInPool represents an error when an IP is not available in 910 // the pool. 911 type ErrIPNotAvailableInPool struct { 912 ip net.IP 913 } 914 915 func (e *ErrIPNotAvailableInPool) Error() string { 916 return fmt.Sprintf("IP %s is not available", e.ip.String()) 917 } 918 919 // Is provides this error type with the logic for use with errors.Is. 920 func (e *ErrIPNotAvailableInPool) Is(target error) bool { 921 if e == nil || target == nil { 922 return false 923 } 924 t, ok := target.(*ErrIPNotAvailableInPool) 925 if !ok { 926 return ok 927 } 928 if t == nil { 929 return false 930 } 931 return t.ip.Equal(e.ip) 932 }