k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/registry/core/service/ipallocator/cidrallocator.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package ipallocator 18 19 import ( 20 "fmt" 21 "net" 22 "net/netip" 23 "sync" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 networkingv1alpha1 "k8s.io/api/networking/v1alpha1" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/labels" 30 utilerrors "k8s.io/apimachinery/pkg/util/errors" 31 "k8s.io/apimachinery/pkg/util/runtime" 32 "k8s.io/apimachinery/pkg/util/sets" 33 "k8s.io/apimachinery/pkg/util/wait" 34 networkingv1alpha1informers "k8s.io/client-go/informers/networking/v1alpha1" 35 networkingv1alpha1client "k8s.io/client-go/kubernetes/typed/networking/v1alpha1" 36 networkingv1alpha1listers "k8s.io/client-go/listers/networking/v1alpha1" 37 "k8s.io/client-go/tools/cache" 38 "k8s.io/client-go/util/workqueue" 39 "k8s.io/klog/v2" 40 api "k8s.io/kubernetes/pkg/apis/core" 41 "k8s.io/kubernetes/pkg/util/iptree" 42 netutils "k8s.io/utils/net" 43 ) 44 45 // MetaAllocator maintains a Tree with the ServiceCIDRs containing an IP Allocator 46 // on the nodes. Since each allocator doesn't stored the IPAddresses because it reads 47 // them from the informer cache, it is cheap to create and delete IP Allocators. 48 // MetaAllocator forwards the request to any of the internal allocators that has free 49 // addresses. 50 51 // MetaAllocator implements current allocator interface using 52 // ServiceCIDR and IPAddress API objects. 53 type MetaAllocator struct { 54 client networkingv1alpha1client.NetworkingV1alpha1Interface 55 serviceCIDRLister networkingv1alpha1listers.ServiceCIDRLister 56 serviceCIDRSynced cache.InformerSynced 57 ipAddressLister networkingv1alpha1listers.IPAddressLister 58 ipAddressSynced cache.InformerSynced 59 ipAddressInformer networkingv1alpha1informers.IPAddressInformer 60 queue workqueue.TypedRateLimitingInterface[string] 61 62 internalStopCh chan struct{} 63 64 muTree sync.Mutex 65 tree *iptree.Tree[*Allocator] 66 67 ipFamily api.IPFamily 68 } 69 70 var _ Interface = &MetaAllocator{} 71 72 // NewMetaAllocator returns an IP allocator that use the IPAddress 73 // and ServiceCIDR objects to track the assigned IP addresses, 74 // using an informer cache as storage. 75 func NewMetaAllocator( 76 client networkingv1alpha1client.NetworkingV1alpha1Interface, 77 serviceCIDRInformer networkingv1alpha1informers.ServiceCIDRInformer, 78 ipAddressInformer networkingv1alpha1informers.IPAddressInformer, 79 isIPv6 bool, 80 ) (*MetaAllocator, error) { 81 82 // TODO: make the NewMetaAllocator agnostic of the IP family 83 family := api.IPv4Protocol 84 if isIPv6 { 85 family = api.IPv6Protocol 86 } 87 88 c := &MetaAllocator{ 89 client: client, 90 serviceCIDRLister: serviceCIDRInformer.Lister(), 91 serviceCIDRSynced: serviceCIDRInformer.Informer().HasSynced, 92 ipAddressLister: ipAddressInformer.Lister(), 93 ipAddressSynced: ipAddressInformer.Informer().HasSynced, 94 ipAddressInformer: ipAddressInformer, 95 queue: workqueue.NewTypedRateLimitingQueueWithConfig( 96 workqueue.DefaultTypedControllerRateLimiter[string](), 97 workqueue.TypedRateLimitingQueueConfig[string]{Name: ControllerName}, 98 ), 99 internalStopCh: make(chan struct{}), 100 tree: iptree.New[*Allocator](), 101 ipFamily: family, 102 } 103 104 _, _ = serviceCIDRInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 105 AddFunc: c.addServiceCIDR, 106 UpdateFunc: c.updateServiceCIDR, 107 DeleteFunc: c.deleteServiceCIDR, 108 }) 109 110 go c.run() 111 112 return c, nil 113 } 114 115 func (c *MetaAllocator) addServiceCIDR(obj interface{}) { 116 key, err := cache.MetaNamespaceKeyFunc(obj) 117 if err == nil { 118 c.queue.Add(key) 119 } 120 } 121 func (c *MetaAllocator) updateServiceCIDR(old, new interface{}) { 122 key, err := cache.MetaNamespaceKeyFunc(new) 123 if err == nil { 124 c.queue.Add(key) 125 } 126 } 127 128 func (c *MetaAllocator) deleteServiceCIDR(obj interface{}) { 129 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) 130 if err == nil { 131 c.queue.Add(key) 132 } 133 } 134 135 func (c *MetaAllocator) run() { 136 defer runtime.HandleCrash() 137 defer c.queue.ShutDown() 138 klog.Info("Starting ServiceCIDR Allocator Controller") 139 defer klog.Info("Stopping ServiceCIDR Allocator Controllerr") 140 141 // Wait for all involved caches to be synced, before processing items from the queue is started 142 if !cache.WaitForCacheSync(c.internalStopCh, c.serviceCIDRSynced, c.ipAddressSynced) { 143 runtime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) 144 return 145 } 146 147 // this is single threaded only one serviceCIDR at a time 148 go wait.Until(c.runWorker, time.Second, c.internalStopCh) 149 150 <-c.internalStopCh 151 } 152 153 func (c *MetaAllocator) runWorker() { 154 for c.processNextItem() { 155 } 156 } 157 158 func (c *MetaAllocator) processNextItem() bool { 159 // Wait until there is a new item in the working queue 160 key, quit := c.queue.Get() 161 if quit { 162 return false 163 } 164 defer c.queue.Done(key) 165 166 err := c.syncTree() 167 // Handle the error if something went wrong during the execution of the business logic 168 if err != nil { 169 if c.queue.NumRequeues(key) < 5 { 170 klog.Infof("Error syncing cidr %v: %v", key, err) 171 c.queue.AddRateLimited(key) 172 return true 173 } 174 } 175 c.queue.Forget(key) 176 return true 177 } 178 179 // syncTree syncs the ipTrees from the informer cache 180 // It deletes or creates allocator and sets the corresponding state 181 func (c *MetaAllocator) syncTree() error { 182 now := time.Now() 183 defer func() { 184 klog.V(2).Infof("Finished sync for CIDRs took %v", time.Since(now)) 185 }() 186 187 serviceCIDRs, err := c.serviceCIDRLister.List(labels.Everything()) 188 if err != nil { 189 return err 190 } 191 192 cidrsSet := sets.New[string]() 193 cidrReady := map[string]bool{} 194 for _, serviceCIDR := range serviceCIDRs { 195 ready := true 196 if !isReady(serviceCIDR) || !serviceCIDR.DeletionTimestamp.IsZero() { 197 ready = false 198 } 199 200 for _, cidr := range serviceCIDR.Spec.CIDRs { 201 if c.ipFamily == api.IPFamily(convertToV1IPFamily(netutils.IPFamilyOfCIDRString(cidr))) { 202 cidrsSet.Insert(cidr) 203 cidrReady[cidr] = ready 204 } 205 } 206 } 207 208 // obtain the existing allocators and set the existing state 209 treeSet := sets.New[string]() 210 c.muTree.Lock() 211 c.tree.DepthFirstWalk(c.ipFamily == api.IPv6Protocol, func(k netip.Prefix, v *Allocator) bool { 212 v.ready.Store(cidrReady[k.String()]) 213 treeSet.Insert(k.String()) 214 return false 215 }) 216 c.muTree.Unlock() 217 cidrsToRemove := treeSet.Difference(cidrsSet) 218 cidrsToAdd := cidrsSet.Difference(treeSet) 219 220 errs := []error{} 221 // Add new allocators 222 for _, cidr := range cidrsToAdd.UnsortedList() { 223 _, ipnet, err := netutils.ParseCIDRSloppy(cidr) 224 if err != nil { 225 return err 226 } 227 // New ServiceCIDR, create new allocator 228 allocator, err := NewIPAllocator(ipnet, c.client, c.ipAddressInformer) 229 if err != nil { 230 errs = append(errs, err) 231 continue 232 } 233 allocator.ready.Store(cidrReady[cidr]) 234 prefix, err := netip.ParsePrefix(cidr) 235 if err != nil { 236 return err 237 } 238 c.addAllocator(prefix, allocator) 239 klog.Infof("Created ClusterIP allocator for Service CIDR %s", cidr) 240 } 241 // Remove allocators that no longer exist 242 for _, cidr := range cidrsToRemove.UnsortedList() { 243 prefix, err := netip.ParsePrefix(cidr) 244 if err != nil { 245 return err 246 } 247 c.deleteAllocator(prefix) 248 } 249 250 return utilerrors.NewAggregate(errs) 251 } 252 253 func (c *MetaAllocator) getAllocator(ip net.IP) (*Allocator, error) { 254 c.muTree.Lock() 255 defer c.muTree.Unlock() 256 257 address := ipToAddr(ip) 258 prefix := netip.PrefixFrom(address, address.BitLen()) 259 // Use the largest subnet to allocate addresses because 260 // all the other subnets will be contained. 261 _, allocator, ok := c.tree.ShortestPrefixMatch(prefix) 262 if !ok { 263 klog.V(2).Infof("Could not get allocator for IP %s", ip.String()) 264 return nil, ErrMismatchedNetwork 265 } 266 return allocator, nil 267 } 268 269 func (c *MetaAllocator) addAllocator(cidr netip.Prefix, allocator *Allocator) { 270 c.muTree.Lock() 271 defer c.muTree.Unlock() 272 c.tree.InsertPrefix(cidr, allocator) 273 } 274 275 func (c *MetaAllocator) deleteAllocator(cidr netip.Prefix) { 276 c.muTree.Lock() 277 defer c.muTree.Unlock() 278 ok := c.tree.DeletePrefix(cidr) 279 if ok { 280 klog.V(3).Infof("CIDR %s deleted", cidr) 281 } 282 } 283 284 func (c *MetaAllocator) AllocateService(service *api.Service, ip net.IP) error { 285 allocator, err := c.getAllocator(ip) 286 if err != nil { 287 return err 288 } 289 return allocator.AllocateService(service, ip) 290 } 291 292 func (c *MetaAllocator) Allocate(ip net.IP) error { 293 allocator, err := c.getAllocator(ip) 294 if err != nil { 295 return err 296 } 297 return allocator.Allocate(ip) 298 } 299 300 func (c *MetaAllocator) AllocateNextService(service *api.Service) (net.IP, error) { 301 c.muTree.Lock() 302 defer c.muTree.Unlock() 303 304 // TODO(aojea) add strategy to return a random allocator but 305 // taking into consideration the number of addresses of each allocator. 306 // Per example, if we have allocator A and B with 256 and 1024 possible 307 // addresses each, the chances to get B has to be 4 times the chances to 308 // get A so we can spread the load of IPs randomly. 309 // However, we need to validate the best strategy before going to Beta. 310 isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol) 311 for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) { 312 ip, err := allocator.AllocateNextService(service) 313 if err == nil { 314 return ip, nil 315 } 316 } 317 return nil, ErrFull 318 } 319 320 func (c *MetaAllocator) AllocateNext() (net.IP, error) { 321 c.muTree.Lock() 322 defer c.muTree.Unlock() 323 324 // TODO(aojea) add strategy to return a random allocator but 325 // taking into consideration the number of addresses of each allocator. 326 // Per example, if we have allocator A and B with 256 and 1024 possible 327 // addresses each, the chances to get B has to be 4 times the chances to 328 // get A so we can spread the load of IPs randomly. 329 // However, we need to validate the best strategy before going to Beta. 330 isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol) 331 for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) { 332 ip, err := allocator.AllocateNext() 333 if err == nil { 334 return ip, nil 335 } 336 } 337 return nil, ErrFull 338 } 339 340 func (c *MetaAllocator) Release(ip net.IP) error { 341 allocator, err := c.getAllocator(ip) 342 if err != nil { 343 return err 344 } 345 return allocator.Release(ip) 346 347 } 348 func (c *MetaAllocator) ForEach(f func(ip net.IP)) { 349 ipLabelSelector := labels.Set(map[string]string{ 350 networkingv1alpha1.LabelIPAddressFamily: string(c.IPFamily()), 351 networkingv1alpha1.LabelManagedBy: ControllerName, 352 }).AsSelectorPreValidated() 353 ips, err := c.ipAddressLister.List(ipLabelSelector) 354 if err != nil { 355 return 356 } 357 for _, ip := range ips { 358 f(netutils.ParseIPSloppy(ip.Name)) 359 } 360 } 361 362 func (c *MetaAllocator) CIDR() net.IPNet { 363 return net.IPNet{} 364 365 } 366 func (c *MetaAllocator) IPFamily() api.IPFamily { 367 return c.ipFamily 368 } 369 func (c *MetaAllocator) Has(ip net.IP) bool { 370 allocator, err := c.getAllocator(ip) 371 if err != nil { 372 return false 373 } 374 return allocator.Has(ip) 375 } 376 func (c *MetaAllocator) Destroy() { 377 select { 378 case <-c.internalStopCh: 379 default: 380 close(c.internalStopCh) 381 } 382 } 383 384 // for testing 385 func (c *MetaAllocator) Used() int { 386 ipLabelSelector := labels.Set(map[string]string{ 387 networkingv1alpha1.LabelIPAddressFamily: string(c.IPFamily()), 388 networkingv1alpha1.LabelManagedBy: ControllerName, 389 }).AsSelectorPreValidated() 390 ips, err := c.ipAddressLister.List(ipLabelSelector) 391 if err != nil { 392 return 0 393 } 394 return len(ips) 395 } 396 397 // for testing 398 func (c *MetaAllocator) Free() int { 399 c.muTree.Lock() 400 defer c.muTree.Unlock() 401 402 size := 0 403 isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol) 404 for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) { 405 size += int(allocator.size) 406 } 407 return size - c.Used() 408 } 409 410 func (c *MetaAllocator) EnableMetrics() {} 411 412 // DryRun returns a random allocator 413 func (c *MetaAllocator) DryRun() Interface { 414 c.muTree.Lock() 415 defer c.muTree.Unlock() 416 isIPv6 := c.ipFamily == api.IPFamily(v1.IPv6Protocol) 417 for _, allocator := range c.tree.TopLevelPrefixes(isIPv6) { 418 return allocator.DryRun() 419 } 420 return &Allocator{} 421 } 422 423 func isReady(serviceCIDR *networkingv1alpha1.ServiceCIDR) bool { 424 if serviceCIDR == nil { 425 return false 426 } 427 428 for _, condition := range serviceCIDR.Status.Conditions { 429 if condition.Type == networkingv1alpha1.ServiceCIDRConditionReady { 430 return condition.Status == metav1.ConditionStatus(metav1.ConditionTrue) 431 } 432 } 433 // assume the ServiceCIDR is Ready, in order to handle scenarios where kcm is not running 434 return true 435 } 436 437 // ipToAddr converts a net.IP to a netip.Addr 438 // if the net.IP is not valid it returns an empty netip.Addr{} 439 func ipToAddr(ip net.IP) netip.Addr { 440 // https://pkg.go.dev/net/netip#AddrFromSlice can return an IPv4 in IPv6 format 441 // so we have to check the IP family to return exactly the format that we want 442 // address, _ := netip.AddrFromSlice(net.ParseIPSloppy(192.168.0.1)) returns 443 // an address like ::ffff:192.168.0.1/32 444 bytes := ip.To4() 445 if bytes == nil { 446 bytes = ip.To16() 447 } 448 // AddrFromSlice returns Addr{}, false if the input is invalid. 449 address, _ := netip.AddrFromSlice(bytes) 450 return address 451 } 452 453 // Convert netutils.IPFamily to v1.IPFamily 454 // TODO: consolidate helpers 455 // copied from pkg/proxy/util/utils.go 456 func convertToV1IPFamily(ipFamily netutils.IPFamily) v1.IPFamily { 457 switch ipFamily { 458 case netutils.IPv4: 459 return v1.IPv4Protocol 460 case netutils.IPv6: 461 return v1.IPv6Protocol 462 } 463 464 return v1.IPFamilyUnknown 465 }