github.com/datadog/cilium@v1.6.12/pkg/ipam/crd.go (about) 1 // Copyright 2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package ipam 16 17 import ( 18 "fmt" 19 "net" 20 "reflect" 21 "sync" 22 "time" 23 24 "github.com/cilium/cilium/pkg/cidr" 25 "github.com/cilium/cilium/pkg/k8s" 26 ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 27 "github.com/cilium/cilium/pkg/k8s/informer" 28 k8sversion "github.com/cilium/cilium/pkg/k8s/version" 29 "github.com/cilium/cilium/pkg/lock" 30 "github.com/cilium/cilium/pkg/node" 31 "github.com/cilium/cilium/pkg/option" 32 "github.com/cilium/cilium/pkg/trigger" 33 34 "github.com/sirupsen/logrus" 35 "k8s.io/api/core/v1" 36 "k8s.io/apimachinery/pkg/fields" 37 "k8s.io/apimachinery/pkg/util/wait" 38 "k8s.io/client-go/tools/cache" 39 ) 40 41 var ( 42 sharedNodeStore *nodeStore 43 initNodeStore sync.Once 44 ) 45 46 const ( 47 // customResourceUpdateRate is the maximum rate in which a custom 48 // resource is updated 49 customResourceUpdateRate = 15 * time.Second 50 51 fieldName = "name" 52 ) 53 54 // nodeStore represents a CiliumNode custom resource and binds the CR to a list 55 // of allocators 56 type nodeStore struct { 57 // mutex protects access to all members of this struct 58 mutex lock.RWMutex 59 60 // ownNode is the last known version of the own node resource 61 ownNode *ciliumv2.CiliumNode 62 63 // allocators is a list of allocators tied to this custom resource 64 allocators []*crdAllocator 65 66 // refreshTrigger is the configured trigger to synchronize updates to 67 // the custom resource with rate limiting 68 refreshTrigger *trigger.Trigger 69 70 // allocationPoolSize is the size of the IP pool for each address 71 // family 72 allocationPoolSize map[Family]int 73 } 74 75 // newNodeStore initializes a new store which reflects the CiliumNode custom 76 // resource of the specified node name 77 func newNodeStore(nodeName string, owner Owner) *nodeStore { 78 log.WithField(fieldName, nodeName).Info("Subscribed to CiliumNode custom resource") 79 80 store := &nodeStore{ 81 allocators: []*crdAllocator{}, 82 allocationPoolSize: map[Family]int{}, 83 } 84 ciliumClient := k8s.CiliumClient() 85 86 t, err := trigger.NewTrigger(trigger.Parameters{ 87 Name: "crd-allocator-node-refresher", 88 MinInterval: customResourceUpdateRate, 89 TriggerFunc: store.refreshNodeTrigger, 90 }) 91 if err != nil { 92 log.WithError(err).Fatal("Unable to initialize CiliumNode synchronization trigger") 93 } 94 store.refreshTrigger = t 95 96 // Create the CiliumNode custom resource. This call will block until 97 // the custom resource has been created 98 owner.UpdateCiliumNodeResource() 99 100 ciliumNodeSelector := fields.ParseSelectorOrDie("metadata.name=" + nodeName) 101 ciliumNodeStore := cache.NewStore(cache.DeletionHandlingMetaNamespaceKeyFunc) 102 ciliumNodeInformer := informer.NewInformerWithStore( 103 cache.NewListWatchFromClient(ciliumClient.CiliumV2().RESTClient(), 104 "ciliumnodes", v1.NamespaceAll, ciliumNodeSelector), 105 &ciliumv2.CiliumNode{}, 106 0, 107 cache.ResourceEventHandlerFuncs{ 108 AddFunc: func(obj interface{}) { 109 var valid, equal bool 110 defer func() { owner.K8sEventReceived("CiliumNode", "create", valid, equal) }() 111 if node, ok := obj.(*ciliumv2.CiliumNode); ok { 112 valid = true 113 store.updateLocalNodeResource(node.DeepCopy()) 114 owner.K8sEventProcessed("CiliumNode", "create", true) 115 } else { 116 log.Warningf("Unknown CiliumNode object type %s received: %+v", reflect.TypeOf(obj), obj) 117 } 118 }, 119 UpdateFunc: func(oldObj, newObj interface{}) { 120 var valid, equal bool 121 defer func() { owner.K8sEventReceived("CiliumNode", "update", valid, equal) }() 122 if node, ok := newObj.(*ciliumv2.CiliumNode); ok { 123 valid = true 124 store.updateLocalNodeResource(node.DeepCopy()) 125 owner.K8sEventProcessed("CiliumNode", "update", true) 126 } else { 127 log.Warningf("Unknown CiliumNode object type %s received: %+v", reflect.TypeOf(newObj), newObj) 128 } 129 }, 130 DeleteFunc: func(obj interface{}) { 131 // Given we are watching a single specific 132 // resource using the node name, any delete 133 // notification means that the resource 134 // matching the local node name has been 135 // removed. No attempt to cast is required. 136 store.deleteLocalNodeResource() 137 owner.K8sEventProcessed("CiliumNode", "delete", true) 138 owner.K8sEventReceived("CiliumNode", "delete", true, false) 139 }, 140 }, 141 func(obj interface{}) interface{} { 142 cnp, _ := obj.(*ciliumv2.CiliumNode) 143 return cnp 144 }, 145 ciliumNodeStore, 146 ) 147 148 go ciliumNodeInformer.Run(wait.NeverStop) 149 150 log.WithField(fieldName, nodeName).Info("Waiting for CiliumNode custom resource to become available...") 151 if ok := cache.WaitForCacheSync(wait.NeverStop, ciliumNodeInformer.HasSynced); !ok { 152 log.WithField(fieldName, nodeName).Fatal("Unable to synchronize CiliumNode custom resource") 153 } else { 154 log.WithField(fieldName, nodeName).Info("Successfully synchronized CiliumNode custom resource") 155 } 156 157 for { 158 minimumReached, required, numAvailable := store.hasMinimumIPsInPool() 159 logFields := logrus.Fields{ 160 fieldName: nodeName, 161 "required": required, 162 "available": numAvailable, 163 } 164 if minimumReached { 165 log.WithFields(logFields).Info("All required IPs are available in CRD-backed allocation pool") 166 break 167 } 168 169 log.WithFields(logFields).Info("Waiting for IPs to become available in CRD-backed allocation pool") 170 time.Sleep(5 * time.Second) 171 } 172 173 store.refreshTrigger.TriggerWithReason("initial sync") 174 175 return store 176 } 177 178 func deriveVpcCIDR(node *ciliumv2.CiliumNode) (result *cidr.CIDR) { 179 if len(node.Status.ENI.ENIs) > 0 { 180 // A node belongs to a single VPC so we can pick the first ENI 181 // in the list and derive the VPC CIDR from it. 182 for _, eni := range node.Status.ENI.ENIs { 183 c, err := cidr.ParseCIDR(eni.VPC.PrimaryCIDR) 184 if err == nil { 185 result = c 186 } 187 return 188 } 189 } 190 return 191 } 192 193 // hasMinimumIPsInPool returns true if the required number of IPs is available 194 // in the allocation pool. It also returns the number of IPs required and 195 // avalable. 196 func (n *nodeStore) hasMinimumIPsInPool() (minimumReached bool, required, numAvailable int) { 197 n.mutex.RLock() 198 defer n.mutex.RUnlock() 199 200 if n.ownNode == nil { 201 return 202 } 203 204 switch { 205 case n.ownNode.Spec.ENI.MinAllocate != 0: 206 required = n.ownNode.Spec.ENI.MinAllocate 207 case n.ownNode.Spec.ENI.PreAllocate != 0: 208 required = n.ownNode.Spec.ENI.PreAllocate 209 case option.Config.EnableHealthChecking: 210 required = 2 211 default: 212 required = 1 213 } 214 215 if n.ownNode.Spec.IPAM.Pool != nil { 216 numAvailable = len(n.ownNode.Spec.IPAM.Pool) 217 if len(n.ownNode.Spec.IPAM.Pool) >= required { 218 minimumReached = true 219 } 220 221 if option.Config.IPAM == option.IPAMENI { 222 if vpcCIDR := deriveVpcCIDR(n.ownNode); vpcCIDR != nil { 223 option.Config.SetIPv4NativeRoutingCIDR(vpcCIDR) 224 } else { 225 minimumReached = false 226 } 227 } 228 } 229 230 return 231 } 232 233 // deleteLocalNodeResource is called when the CiliumNode resource representing 234 // the local node has been deleted. 235 func (n *nodeStore) deleteLocalNodeResource() { 236 n.mutex.Lock() 237 n.ownNode = nil 238 n.mutex.Unlock() 239 } 240 241 // updateLocalNodeResource is called when the CiliumNode resource representing 242 // the local node has been added or updated. It updates the available IPs based 243 // on the custom resource passed into the function. 244 func (n *nodeStore) updateLocalNodeResource(node *ciliumv2.CiliumNode) { 245 n.mutex.Lock() 246 defer n.mutex.Unlock() 247 248 n.ownNode = node 249 n.allocationPoolSize[IPv4] = 0 250 n.allocationPoolSize[IPv6] = 0 251 if node.Spec.IPAM.Pool != nil { 252 for ipString := range node.Spec.IPAM.Pool { 253 if ip := net.ParseIP(ipString); ip != nil { 254 if ip.To4() != nil { 255 n.allocationPoolSize[IPv4]++ 256 } else { 257 n.allocationPoolSize[IPv6]++ 258 } 259 } 260 } 261 } 262 } 263 264 // refreshNodeTrigger is called to refresh the custom resource after taking the 265 // configured rate limiting into account 266 // 267 // Note: The function signature includes the reasons argument in order to 268 // implement the trigger.TriggerFunc interface despite the argument being 269 // unused. 270 func (n *nodeStore) refreshNodeTrigger(reasons []string) { 271 if err := n.refreshNode(); err != nil { 272 log.WithError(err).Warning("Unable to update CiliumNode custom resource") 273 n.refreshTrigger.TriggerWithReason("retry after error") 274 } 275 } 276 277 // refreshNode updates the custom resource in the apiserver based on the latest 278 // information in the local node store 279 func (n *nodeStore) refreshNode() error { 280 n.mutex.RLock() 281 if n.ownNode == nil { 282 n.mutex.RUnlock() 283 return nil 284 } 285 286 node := n.ownNode.DeepCopy() 287 staleCopyOfAllocators := make([]*crdAllocator, len(n.allocators)) 288 copy(staleCopyOfAllocators, n.allocators) 289 n.mutex.RUnlock() 290 291 node.Status.IPAM.Used = map[string]ciliumv2.AllocationIP{} 292 293 for _, a := range staleCopyOfAllocators { 294 a.mutex.RLock() 295 for ip, ipInfo := range a.allocated { 296 node.Status.IPAM.Used[ip] = ipInfo 297 } 298 a.mutex.RUnlock() 299 } 300 301 var err error 302 k8sCapabilities := k8sversion.Capabilities() 303 ciliumClient := k8s.CiliumClient() 304 switch { 305 case k8sCapabilities.UpdateStatus: 306 _, err = ciliumClient.CiliumV2().CiliumNodes().UpdateStatus(node) 307 default: 308 _, err = ciliumClient.CiliumV2().CiliumNodes().Update(node) 309 } 310 311 return err 312 } 313 314 // addAllocator adds a new CRD allocator to the node store 315 func (n *nodeStore) addAllocator(allocator *crdAllocator) { 316 n.mutex.Lock() 317 n.allocators = append(n.allocators, allocator) 318 n.mutex.Unlock() 319 } 320 321 // allocate checks if a particular IP can be allocated or return an error 322 func (n *nodeStore) allocate(ip net.IP) (*ciliumv2.AllocationIP, error) { 323 n.mutex.RLock() 324 defer n.mutex.RUnlock() 325 326 if n.ownNode == nil { 327 return nil, fmt.Errorf("CiliumNode for own node is not available") 328 } 329 330 if n.ownNode.Spec.IPAM.Pool == nil { 331 return nil, fmt.Errorf("No IPs available") 332 } 333 334 ipInfo, ok := n.ownNode.Spec.IPAM.Pool[ip.String()] 335 if !ok { 336 return nil, fmt.Errorf("IP %s is not available", ip.String()) 337 } 338 339 return &ipInfo, nil 340 } 341 342 // allocateNext allocates the next available IP or returns an error 343 func (n *nodeStore) allocateNext(allocated map[string]ciliumv2.AllocationIP, family Family) (net.IP, *ciliumv2.AllocationIP, error) { 344 n.mutex.RLock() 345 defer n.mutex.RUnlock() 346 347 if n.ownNode == nil { 348 return nil, nil, fmt.Errorf("CiliumNode for own node is not available") 349 } 350 351 // FIXME: This is currently using a brute-force method that can be 352 // optimized 353 for ip, ipInfo := range n.ownNode.Spec.IPAM.Pool { 354 if _, ok := allocated[ip]; !ok { 355 parsedIP := net.ParseIP(ip) 356 if parsedIP == nil { 357 log.WithFields(logrus.Fields{ 358 fieldName: n.ownNode.Name, 359 "ip": ip, 360 }).Warning("Unable to parse IP in CiliumNode custom resource") 361 continue 362 } 363 364 if DeriveFamily(parsedIP) != family { 365 continue 366 } 367 368 return parsedIP, &ipInfo, nil 369 } 370 } 371 372 return nil, nil, fmt.Errorf("No more IPs available") 373 } 374 375 // crdAllocator implements the CRD-backed IP allocator 376 type crdAllocator struct { 377 // store is the node store backing the custom resource 378 store *nodeStore 379 380 // mutex protects access to the allocated map 381 mutex lock.RWMutex 382 383 // allocated is a map of all allocated IPs indexed by the allocated IP 384 // represented as string 385 allocated map[string]ciliumv2.AllocationIP 386 387 // family is the address family this allocator is allocator for 388 family Family 389 } 390 391 // newCRDAllocator creates a new CRD-backed IP allocator 392 func newCRDAllocator(family Family, owner Owner) Allocator { 393 initNodeStore.Do(func() { 394 sharedNodeStore = newNodeStore(node.GetName(), owner) 395 }) 396 397 allocator := &crdAllocator{ 398 allocated: map[string]ciliumv2.AllocationIP{}, 399 family: family, 400 store: sharedNodeStore, 401 } 402 403 sharedNodeStore.addAllocator(allocator) 404 405 return allocator 406 } 407 408 func deriveGatewayIP(eni ciliumv2.ENI) string { 409 subnetIP, _, err := net.ParseCIDR(eni.Subnet.CIDR) 410 if err != nil { 411 log.WithError(err).Warningf("Unable to parse AWS subnet CIDR %s", eni.Subnet.CIDR) 412 return "" 413 } 414 415 addr := subnetIP.To4() 416 417 // The gateway for a subnet and VPC is always x.x.x.1 418 // Ref: https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Route_Tables.html 419 return net.IPv4(addr[0], addr[1], addr[2], addr[3]+1).String() 420 } 421 422 func (a *crdAllocator) buildAllocationResult(ip net.IP, ipInfo *ciliumv2.AllocationIP) (result *AllocationResult, err error) { 423 result = &AllocationResult{IP: ip} 424 425 // In ENI mode, the Resource points to the ENI so we can derive the 426 // master interface and all CIDRs of the VPC 427 if option.Config.IPAM == option.IPAMENI { 428 a.store.mutex.RLock() 429 defer a.store.mutex.RUnlock() 430 431 if a.store.ownNode == nil { 432 return 433 } 434 435 for _, eni := range a.store.ownNode.Status.ENI.ENIs { 436 if eni.ID == ipInfo.Resource { 437 result.Master = eni.MAC 438 result.CIDRs = []string{eni.VPC.PrimaryCIDR} 439 result.CIDRs = append(result.CIDRs, eni.VPC.CIDRs...) 440 if eni.Subnet.CIDR != "" { 441 result.GatewayIP = deriveGatewayIP(eni) 442 } 443 444 return 445 } 446 } 447 448 result = nil 449 err = fmt.Errorf("unable to find ENI %s", ipInfo.Resource) 450 } 451 452 return 453 } 454 455 // Allocate will attempt to find the specified IP in the custom resource and 456 // allocate it if it is available. If the IP is unavailable or already 457 // allocated, an error is returned. The custom resource will be updated to 458 // reflect the newly allocated IP. 459 func (a *crdAllocator) Allocate(ip net.IP, owner string) (*AllocationResult, error) { 460 a.mutex.Lock() 461 defer a.mutex.Unlock() 462 463 if _, ok := a.allocated[ip.String()]; ok { 464 return nil, fmt.Errorf("IP already in use") 465 } 466 467 ipInfo, err := a.store.allocate(ip) 468 if err != nil { 469 return nil, err 470 } 471 472 a.markAllocated(ip, owner, *ipInfo) 473 474 return a.buildAllocationResult(ip, ipInfo) 475 } 476 477 // Release will release the specified IP or return an error if the IP has not 478 // been allocated before. The custom resource will be updated to reflect the 479 // released IP. 480 func (a *crdAllocator) Release(ip net.IP) error { 481 a.mutex.Lock() 482 defer a.mutex.Unlock() 483 484 if _, ok := a.allocated[ip.String()]; !ok { 485 return fmt.Errorf("IP %s is not allocated", ip.String()) 486 } 487 488 delete(a.allocated, ip.String()) 489 a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("release of IP %s", ip.String())) 490 491 return nil 492 } 493 494 // markAllocated marks a particular IP as allocated and triggers the custom 495 // resource update 496 func (a *crdAllocator) markAllocated(ip net.IP, owner string, ipInfo ciliumv2.AllocationIP) { 497 ipInfo.Owner = owner 498 a.allocated[ip.String()] = ipInfo 499 a.store.refreshTrigger.TriggerWithReason(fmt.Sprintf("allocation of IP %s", ip.String())) 500 } 501 502 // AllocateNext allocates the next available IP as offered by the custom 503 // resource or return an error if no IP is available. The custom resource will 504 // be updated to reflect the newly allocated IP. 505 func (a *crdAllocator) AllocateNext(owner string) (*AllocationResult, error) { 506 a.mutex.Lock() 507 defer a.mutex.Unlock() 508 509 ip, ipInfo, err := a.store.allocateNext(a.allocated, a.family) 510 if err != nil { 511 return nil, err 512 } 513 514 a.markAllocated(ip, owner, *ipInfo) 515 516 return a.buildAllocationResult(ip, ipInfo) 517 } 518 519 // totalPoolSize returns the total size of the allocation pool 520 // a.mutex must be held 521 func (a *crdAllocator) totalPoolSize() int { 522 if num, ok := a.store.allocationPoolSize[a.family]; ok { 523 return num 524 } 525 return 0 526 } 527 528 // Dump provides a status report and lists all allocated IP addressess 529 func (a *crdAllocator) Dump() (map[string]string, string) { 530 a.mutex.RLock() 531 defer a.mutex.RUnlock() 532 533 allocs := map[string]string{} 534 for ip := range a.allocated { 535 allocs[ip] = "" 536 } 537 538 status := fmt.Sprintf("%d/%d allocated", len(allocs), a.totalPoolSize()) 539 return allocs, status 540 }